lgloege/database.json Secret

## database.json
[
    {
        "name": "sst.wkmean.1990-present.nc",
        "cid": "bafybeie6rzt3d23ytcxyjjmmk2vjx73fk72zeauaotrsecyc37z6x63lxu"
    },
    {
        "name": "sst.wkmean.1990-present_consolidated.zarr",
        "cid": "bafybeiepna7ilkhdwykd65i7aovmrapnmaifullesxg5pdsjy6cos5qfrm"
    },
    {
        "name": "sst.wkmean.1990-present_unconsolidated.zarr",
        "cid": "bafybeiespqdrxcdu4nxjqzsktgsgh2k4voggnfexqcdnjhnxk3xc2xn6he"
    }
]

## mongodb_example.py
import xarray as xr
import pymongo
import json

# connect to MongoDB
USERNAME = "username"
PASSWORD = "password"
conn = f'mongodb+srv://{USERNAME}:{PASSWORD}@cluster0.ibnvm.mongodb.net/?retryWrites=true&w=majority'
cluster = pymongo.MongoClient(conn)

# Create and database named ipfs and collection named repository
db = cluster.ipfs
coll = db['repository']

# open database json file
with open('./database.json') as f:
    data = json.load(f)

# insert records into collection
coll.insert_many(data)

# Query the database and for CID
cid = list(coll.find({"name": {'$regex':'_consolidated'}}))[0]['cid']

# Retrieve data at the CID
ds = xr.open_zarr(f'ipfs://{cid}', consolidated=True)

## read_csv.py
import pandas as pd

# content ID
CID = "bafybeidfdcfjgi6rcca7ghqiwjcwaaz7verjgdbepfjonrksle3kc2avui"

# read the data into dataframe
df = pd.read_csv(f"https://dweb.link/ipfs/{CID}/test.csv")

## read_json.py
import urllib.request
import json

# content ID
cid = "bafkreifacnxyt7p45fat7nirzcqhw45ac4lphyqb5ltyrnpttq3iqm4miu"

# read .json
with urllib.request.urlopen(f"https://dweb.link/ipfs/{cid}") as url:
    data = json.loads(url.read())

# [{'name': 'sst.wkmean.1990-present.nc',
#   'cid': 'bafybeie6rzt3d23ytcxyjjmmk2vjx73fk72zeauaotrsecyc37z6x63lxu'},
#  {'name': 'sst.wkmean.1990-present_consolidated.zarr',
#   'cid': 'bafybeiepna7ilkhdwykd65i7aovmrapnmaifullesxg5pdsjy6cos5qfrm'},
#  {'name': 'sst.wkmean.1990-present_unconsolidated.zarr',
#   'cid': 'bafybeiespqdrxcdu4nxjqzsktgsgh2k4voggnfexqcdnjhnxk3xc2xn6he'}]

## read_zarr.py
import xarray as xr

# content ID
CID =  'bafybeicocwmfbct57lt62klus2adkoq5rlpb6dfpjt7en66vo6s2lf3qmq'

# read zarr
ds = xr.open_zarr(f'ipfs://{CID}/sst.wkmean.1990-present_consolidated.zarr', consolidated=True)

## time_zarr_consolidated.py
%%timeit
import xarray as xr

# content ID
CID =  'bafybeicocwmfbct57lt62klus2adkoq5rlpb6dfpjt7en66vo6s2lf3qmq'

# read zarr
ds = xr.open_zarr(f'ipfs://{CID}/sst.wkmean.1990-present_consolidated.zarr', consolidated=True)

## time_zarr_unconsolidated.py
%%timeit
import xarray as xr

# content ID
CID =  'bafybeicocwmfbct57lt62klus2adkoq5rlpb6dfpjt7en66vo6s2lf3qmq'

# read zarr
ds = xr.open_zarr(f'ipfs://{CID}/sst.wkmean.1990-present_unconsolidated.zarr', consolidated=False)
	[
	{
	"name": "sst.wkmean.1990-present.nc",
	"cid": "bafybeie6rzt3d23ytcxyjjmmk2vjx73fk72zeauaotrsecyc37z6x63lxu"
	},
	{
	"name": "sst.wkmean.1990-present_consolidated.zarr",
	"cid": "bafybeiepna7ilkhdwykd65i7aovmrapnmaifullesxg5pdsjy6cos5qfrm"
	},
	{
	"name": "sst.wkmean.1990-present_unconsolidated.zarr",
	"cid": "bafybeiespqdrxcdu4nxjqzsktgsgh2k4voggnfexqcdnjhnxk3xc2xn6he"
	}
	]
	import xarray as xr
	import pymongo
	import json

	# connect to MongoDB
	USERNAME = "username"
	PASSWORD = "password"
	conn = f'mongodb+srv://{USERNAME}:{PASSWORD}@cluster0.ibnvm.mongodb.net/?retryWrites=true&w=majority'
	cluster = pymongo.MongoClient(conn)

	# Create and database named ipfs and collection named repository
	db = cluster.ipfs
	coll = db['repository']

	# open database json file
	with open('./database.json') as f:
	data = json.load(f)

	# insert records into collection
	coll.insert_many(data)

	# Query the database and for CID
	cid = list(coll.find({"name": {'$regex':'_consolidated'}}))[0]['cid']

	# Retrieve data at the CID
	ds = xr.open_zarr(f'ipfs://{cid}', consolidated=True)
	import pandas as pd

	# content ID
	CID = "bafybeidfdcfjgi6rcca7ghqiwjcwaaz7verjgdbepfjonrksle3kc2avui"

	# read the data into dataframe
	df = pd.read_csv(f"https://dweb.link/ipfs/{CID}/test.csv")
	import urllib.request
	import json

	# content ID
	cid = "bafkreifacnxyt7p45fat7nirzcqhw45ac4lphyqb5ltyrnpttq3iqm4miu"

	# read .json
	with urllib.request.urlopen(f"https://dweb.link/ipfs/{cid}") as url:
	data = json.loads(url.read())

	# [{'name': 'sst.wkmean.1990-present.nc',
	# 'cid': 'bafybeie6rzt3d23ytcxyjjmmk2vjx73fk72zeauaotrsecyc37z6x63lxu'},
	# {'name': 'sst.wkmean.1990-present_consolidated.zarr',
	# 'cid': 'bafybeiepna7ilkhdwykd65i7aovmrapnmaifullesxg5pdsjy6cos5qfrm'},
	# {'name': 'sst.wkmean.1990-present_unconsolidated.zarr',
	# 'cid': 'bafybeiespqdrxcdu4nxjqzsktgsgh2k4voggnfexqcdnjhnxk3xc2xn6he'}]
	import xarray as xr

	# content ID
	CID = 'bafybeicocwmfbct57lt62klus2adkoq5rlpb6dfpjt7en66vo6s2lf3qmq'

	# read zarr
	ds = xr.open_zarr(f'ipfs://{CID}/sst.wkmean.1990-present_consolidated.zarr', consolidated=True)
	%%timeit
	import xarray as xr

	# content ID
	CID = 'bafybeicocwmfbct57lt62klus2adkoq5rlpb6dfpjt7en66vo6s2lf3qmq'

	# read zarr
	ds = xr.open_zarr(f'ipfs://{CID}/sst.wkmean.1990-present_consolidated.zarr', consolidated=True)