Skip to content

Instantly share code, notes, and snippets.

@EHJ-52n
Last active April 30, 2021 10:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save EHJ-52n/c7e8fd51332e11b45aa25a9691b7f888 to your computer and use it in GitHub Desktop.
Save EHJ-52n/c7e8fd51332e11b45aa25a9691b7f888 to your computer and use it in GitHub Desktop.
open data cube: understanding coordinate handling
import datacube
import os
import rioxarray
import subprocess
import sys
import time
import xarray
# database configuration
# please adjust to your local set-up
db_host = 'localhost'
db_name = 'opendatacube'
db_user = 'opendatacube'
db_pw = 'opendatacube'
# Create array for wave height:
height = xarray.DataArray([[1., 2., 3.], [4., 5., 6.]], dims=(
"latitude", "longitude"), coords={"latitude": [1., 2.], "longitude": [3., 4., 5.]})
# Create dataset with wave height:
ds = xarray.Dataset({"height": height})
ds.rio.write_crs(4326, inplace=True)
# Save dataset to netCDF file:
netcdf_filename = 'dim-example.nc'
ds.to_netcdf(path=netcdf_filename, mode="w")
# Load dataset from disk:
ds_disk = xarray.open_dataset(netcdf_filename)
# Write product type yaml to disk
product_yaml = 'dim-example-product.yaml'
product = open(product_yaml, 'w')
product.write("""name: dim_example
description: minimal example eo
metadata_type: eo
metadata:
format: {name: NETCDF}
instrument: {name: NA}
platform: {code: NA}
product_type: dim_example
storage:
crs: EPSG:4326
resolution:
longitude: 1.0
latitude: 1.0
measurements:
- name: height
units: m
dtype: float64
nodata: -1.23
""")
product.close()
# Write dataset yaml to disk
dataset_yaml = 'dim-example-dataset.yaml'
dataset = open(dataset_yaml, 'w')
dataset.write("""creation_dt: '1970-01-01T00:00:00Z'
extent:
center_dt: '1970-01-01T00:00:00Z'
coord:
ll: {lon: 3, lat: 1}
lr: {lon: 5, lat: 1}
ul: {lon: 3, lat: 2}
ur: {lon: 5, lat: 2}
from_dt: '1970-01-01T00:00:00Z'
to_dt: '1970-01-01T00:00:00Z'
format: {name: NETCDF}
grid_spatial:
projection:
geo_ref_points:
ll: {x: 3, y: 1}
lr: {x: 5, y: 1}
ul: {x: 3, y: 2}
ur: {x: 5, y: 2}
spatial_reference: EPSG:4326
id: 272302c9-1449-4a33-8166-4b6083a8a801
image:
bands:
height: {path: dim-example.nc, layer: height}
instrument: {name: NA}
lineage:
source_datasets: {}
product_type: dim_example
platform: {code: NA}
""")
dataset.close()
# Write datacube.conf to disk
datacube_conf = 'datacube.conf'
odc_config = open(datacube_conf, 'w')
odc_config.write("""[default]
db_database: {0}
db_hostname: {1}
db_username: {2}
db_password: {3}
index_driver: default
""".format(db_name, db_host, db_user, db_pw))
odc_config.close()
# Check datacube database init status
cmd = subprocess.Popen("datacube --config datacube.conf system check",
stdout=subprocess.PIPE, stderr=None, shell=True, bufsize=0)
output = cmd.communicate()[0].decode()
if cmd.returncode != 0:
# database is not configured properly or not accessible
if "Valid connection" in output and "Database not initialised" in output:
cmd = subprocess.Popen("datacube --config datacube.conf system init",
stdout=subprocess.PIPE, stderr=None, shell=True, bufsize=0)
output = cmd.communicate()[0].decode()
else:
print(
"Any other error happened. Please check error output:\n{0}".format(output))
sys.exit(1)
# Check product registration status
cmd = subprocess.Popen('datacube --config datacube.conf product list',
stdout=subprocess.PIPE, stderr=None, shell=True, bufsize=0)
output = cmd.communicate()[0].decode()
if cmd.returncode != 0 or 'dim_example' not in output:
cmd = subprocess.Popen('datacube --config datacube.conf product add dim-example-product.yaml',
stdout=subprocess.PIPE, stderr=None, shell=True, bufsize=0)
output = cmd.communicate()[0].decode()
if cmd.returncode != 0:
print("Could not register product. Please check output")
print(output)
sys.exit(1)
time.sleep(1)
# Check dataset registration status
cmd = subprocess.Popen('datacube --config datacube.conf dataset search product=dim_example',
stdout=subprocess.PIPE, stderr=None, shell=True, bufsize=0)
output = cmd.communicate()[0].decode()
if cmd.returncode != 0 or 'dim_example' not in output:
cmd = subprocess.Popen('datacube --config datacube.conf dataset add --product dim_example dim-example-dataset.yaml',
stdout=subprocess.PIPE, stderr=None, shell=True, bufsize=0)
output = cmd.communicate()[0].decode()
if cmd.returncode != 0:
print("Could not register dataset. Please check output")
print(output)
sys.exit(1)
# Create datacube instance:
dc = datacube.Datacube()
# Print information to console
print("""
Input data
-------------------------------------------------------------------------------
""")
print(ds_disk)
print("""
Additional information
height values:""")
print(ds_disk.height.values)
print('spatial_ref: {0}'.format(ds_disk.height.rio.crs))
print("""
Output data
-------------------------------------------------------------------------------
""")
# Load dataset
print("""query1 = {\"latitude\": (0, 2), \"longitude\": (2, 5)}
print(dc.load('dim_example', **query1))
""")
query1 = {"latitude": (0, 2), "longitude": (2, 5)}
print(dc.load('dim_example', **query1))
print("""
Loading without query parameter != load everything
-------------------------------------------------------------------------------""")
print("dc.load('dim_example')")
print(dc.load('dim_example'))
print("""
Different query parameters result in same values but different coordinates
-------------------------------------------------------------------------------""")
data1 = dc.load('dim_example', **query1)
print("""Query 1 : {0}
Coords 1: {1}
Values 1: {2}
""".format(query1, data1.coords, data1.height.values))
query2 = {"latitude": (1, 3), "longitude": (3, 6)}
data2 = dc.load('dim_example', **query2)
print("""Query 2 : {0}
Coords 2: {1}
Values 2: {2}
""".format(query2, data2.coords, data2.height.values))
dc.close()
# Delete data from data cube
cmd = subprocess.Popen("""PGPASSWORD={0} psql \
-v product_name=dim_example \
-f delete_odc_product.sql \
-h {1} \
-U {2} \
{3}""".format(db_pw, db_host, db_user, db_name),
stdout=subprocess.PIPE, stderr=None, shell=True, bufsize=0)
output = cmd.communicate()[0].decode()
if cmd.returncode != 0:
print("Could not remove dataset and product type from datacube. Please check output")
print(output)
sys.exit(1)
for file in {netcdf_filename, product_yaml, dataset_yaml, datacube_conf}:
os.remove(file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment