Skip to content

Instantly share code, notes, and snippets.

@scottyhq
Created August 28, 2019 04:39
Show Gist options
  • Save scottyhq/8222b99c3400209f96826d09389482c1 to your computer and use it in GitHub Desktop.
Save scottyhq/8222b99c3400209f96826d09389482c1 to your computer and use it in GitHub Desktop.
Trying out tiledb with python, gdal, xarray
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test out tileDB\n",
"https://docs.tiledb.io/en/stable/quickstart.html#a-simple-dense-array-example"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1, 6, 2)\n"
]
}
],
"source": [
"import numpy as np\n",
"import sys\n",
"import tiledb\n",
"\n",
"print(tiledb.libtiledb.version())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Array"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Name of the array to create.\n",
"array_name = \"quickstart_dense\"\n",
"\n",
"def create_array():\n",
" # Create a TileDB context (removed to just use default)\n",
" # ctx = tiledb.Ctx()\n",
"\n",
" # Check if the array already exists.\n",
" if tiledb.object_type(array_name) == \"array\":\n",
" print(\"Array already exists.\")\n",
" #sys.exit(0)\n",
" return\n",
"\n",
" # The array will be 4x4 with dimensions \"rows\" and \"cols\", with domain [1,4].\n",
" dom = tiledb.Domain(tiledb.Dim(name=\"rows\", domain=(1, 4), tile=4, dtype=np.int32),\n",
" tiledb.Dim(name=\"cols\", domain=(1, 4), tile=4, dtype=np.int32))\n",
"\n",
" # The array will be dense with a single attribute \"a\" so each (i,j) cell can store an integer.\n",
" schema = tiledb.ArraySchema(domain=dom, sparse=False,\n",
" attrs=[tiledb.Attr(name=\"a\", dtype=np.int32)])\n",
"\n",
" # Create the (empty) array on disk.\n",
" tiledb.DenseArray.create(array_name, schema)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Array already exists.\n"
]
}
],
"source": [
"create_array()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Write array"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def write_array():\n",
" #ctx = tiledb.Ctx()\n",
" # Open the array and write to it.\n",
" with tiledb.DenseArray(array_name, mode='w') as A:\n",
" data = np.array(([1, 2, 3, 4],\n",
" [5, 6, 7, 8],\n",
" [9, 10, 11, 12],\n",
" [13, 14, 15, 16]))\n",
" A[:] = data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"write_array()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read array"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def read_array():\n",
" #ctx = tiledb.Ctx()\n",
" # Open the array and read from it.\n",
" with tiledb.DenseArray(array_name, mode='r') as A:\n",
" # Slice only rows 1, 2 and cols 2, 3, 4.\n",
" data = A[1:3, 2:5]\n",
" print(data[\"a\"])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[2 3 4]\n",
" [6 7 8]]\n"
]
}
],
"source": [
"read_array()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use GDAL driver"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"GDAL 3.0.1, released 2019/06/28\n"
]
}
],
"source": [
"!gdalinfo --version"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"rest.server_serialization_format CAPNP\n",
"sm.check_coord_dups true\n",
"sm.check_coord_oob true\n",
"sm.check_global_order true\n",
"sm.consolidation.amplification 1\n",
"sm.consolidation.buffer_size 50000000\n",
"sm.consolidation.step_max_frags 4294967295\n",
"sm.consolidation.step_min_frags 4294967295\n",
"sm.consolidation.step_size_ratio 0\n",
"sm.consolidation.steps 4294967295\n",
"sm.dedup_coords false\n",
"sm.enable_signal_handlers true\n",
"sm.memory_budget 5368709120\n",
"sm.memory_budget_var 10737418240\n",
"sm.num_async_threads 1\n",
"sm.num_reader_threads 1\n",
"sm.num_tbb_threads -1\n",
"sm.num_writer_threads 1\n",
"sm.tile_cache_size 10000000\n",
"vfs.file.enable_filelocks true\n",
"vfs.file.max_parallel_ops 8\n",
"vfs.min_batch_gap 512000\n",
"vfs.min_batch_size 20971520\n",
"vfs.min_parallel_size 10485760\n",
"vfs.num_threads 8\n",
"vfs.s3.connect_max_tries 5\n",
"vfs.s3.connect_scale_factor 25\n",
"vfs.s3.connect_timeout_ms 3000\n",
"vfs.s3.max_parallel_ops 8\n",
"vfs.s3.multipart_part_size 5242880\n",
"vfs.s3.proxy_port 0\n",
"vfs.s3.proxy_scheme https\n",
"vfs.s3.region us-east-1\n",
"vfs.s3.request_timeout_ms 3000\n",
"vfs.s3.scheme https\n",
"vfs.s3.use_multipart_upload true\n",
"vfs.s3.use_virtual_addressing true\n"
]
}
],
"source": [
"# Write default config to a file\n",
"# Save to file\n",
"config = tiledb.Config()\n",
"config.save(\"tiledb.config\")\n",
"!cat tiledb.config"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Parameter | Value \n",
"-------------------------------- | -------------\n",
"rest.server_serialization_format | 'CAPNP' \n",
"sm.check_coord_dups | 'true' \n",
"sm.check_coord_oob | 'true' \n",
"sm.check_global_order | 'true' \n",
"sm.consolidation.amplification | '1' \n",
"sm.consolidation.buffer_size | '50000000' \n",
"sm.consolidation.step_max_frags | '4294967295' \n",
"sm.consolidation.step_min_frags | '4294967295' \n",
"sm.consolidation.step_size_ratio | '0' \n",
"sm.consolidation.steps | '4294967295' \n",
"sm.dedup_coords | 'false' \n",
"sm.enable_signal_handlers | 'true' \n",
"sm.memory_budget | '5368709120' \n",
"sm.memory_budget_var | '10737418240'\n",
"sm.num_async_threads | '1' \n",
"sm.num_reader_threads | '1' \n",
"sm.num_tbb_threads | '-1' \n",
"sm.num_writer_threads | '1' \n",
"sm.tile_cache_size | '10000000' \n",
"vfs.file.enable_filelocks | 'true' \n",
"vfs.file.max_parallel_ops | '8' \n",
"vfs.hdfs.kerb_ticket_cache_path | '' \n",
"vfs.hdfs.name_node_uri | '' \n",
"vfs.hdfs.username | '' \n",
"vfs.min_batch_gap | '512000' \n",
"vfs.min_batch_size | '20971520' \n",
"vfs.min_parallel_size | '10485760' \n",
"vfs.num_threads | '8' \n",
"vfs.s3.aws_access_key_id | '' \n",
"vfs.s3.aws_secret_access_key | '' \n",
"vfs.s3.connect_max_tries | '5' \n",
"vfs.s3.connect_scale_factor | '25' \n",
"vfs.s3.connect_timeout_ms | '3000' \n",
"vfs.s3.endpoint_override | '' \n",
"vfs.s3.max_parallel_ops | '8' \n",
"vfs.s3.multipart_part_size | '5242880' \n",
"vfs.s3.proxy_host | '' \n",
"vfs.s3.proxy_password | '' \n",
"vfs.s3.proxy_port | '0' \n",
"vfs.s3.proxy_scheme | 'https' \n",
"vfs.s3.proxy_username | '' \n",
"vfs.s3.region | 'us-east-1' \n",
"vfs.s3.request_timeout_ms | '3000' \n",
"vfs.s3.scheme | 'https' \n",
"vfs.s3.use_multipart_upload | 'true' \n",
"vfs.s3.use_virtual_addressing | 'true' \n"
]
}
],
"source": [
"# Load from file\n",
"config_load = tiledb.Config.load(\"tiledb.config\")\n",
"print(config_load)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Name Value Type Location\n",
" ---- ----- ---- --------\n",
" profile guest-s3 manual --profile\n",
"access_key ****************C2HA shared-credentials-file \n",
"secret_key ****************fXt2 shared-credentials-file \n",
" region <not set> None None\n"
]
}
],
"source": [
"!aws --profile guest-s3 configure list"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" PRE DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tiledb/\n"
]
}
],
"source": [
"# Uses EC2 instance credentials\n",
"!aws --profile guest-s3 s3 ls s3://pangeo-data-upload-virginia/gdal3-test/DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tiledb"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Driver: TileDB/TileDB\n",
"Files: DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tiledb/DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tdb\n",
" DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tiledb\n",
" DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tiledb/DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tdb.aux.xml\n",
"Size is 512, 512\n",
"Metadata:\n",
" solar_zenith_angle_long_name=solar zenith angle\n",
" solar_zenith_angle_standard_name=solar_zenith_angle\n",
" solar_zenith_angle_units=degrees\n",
" solar_zenith_angle_valid_range=0 90 \n",
" solar_zenith_angle__FillValue=-999 \n",
" viewing_zenith_angle_long_name=viewing zenith angle\n",
" viewing_zenith_angle_units=degrees\n",
" viewing_zenith_angle_valid_range=0 90 \n",
" viewing_zenith_angle__FillValue=-999 \n",
"Subdatasets:\n",
" SUBDATASET_1_NAME=TILEDB:\"DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tiledb\":solar_zenith_angle\n",
" SUBDATASET_1_DESC=[1x360x180] solar_zenith_angle (Float32)\n",
" SUBDATASET_2_NAME=TILEDB:\"DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tiledb\":viewing_zenith_angle\n",
" SUBDATASET_2_DESC=[1x360x180] viewing_zenith_angle (Float32)\n",
"Corner Coordinates:\n",
"Upper Left ( 0.0, 0.0)\n",
"Lower Left ( 0.0, 512.0)\n",
"Upper Right ( 512.0, 0.0)\n",
"Lower Right ( 512.0, 512.0)\n",
"Center ( 256.0, 256.0)\n"
]
}
],
"source": [
"!gdalinfo DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tiledb"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ERROR 4: `DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tiledb' not recognized as a supported file format.\n",
"gdalinfo failed - unable to open 'DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tiledb'.\n"
]
}
],
"source": [
"!CPL_DEBUG=ON gdalinfo -oo TILEDB_CONFIG=tiledb.config DeepBlue-SeaWiFS-1.0_L3_20100101_v004-20130604T131317Z.tiledb"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:gdal3]",
"language": "python",
"name": "conda-env-gdal3-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment