Create a dense matrix random integers.
library(tiledb)
set.seed(123)
nr <- 50
nc <- 8
m <- matrix(
sample(seq_len(nr), nr*nc, replace=TRUE),
nrow=nr,
ncol=nc
)
m
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 31 13 23 26 10 30 6 3
## [2,] 15 18 15 9 25 31 42 18
## [3,] 14 33 21 7 8 29 42 32
## [4,] 3 27 37 34 18 17 38 17
## [5,] 42 25 8 48 9 37 48 38
## [6,] 50 38 10 13 7 20 16 42
## [7,] 43 21 50 19 7 35 35 40
## [8,] 37 15 42 47 10 25 36 42
## [9,] 14 41 44 39 24 46 27 32
## [10,] 25 47 34 4 23 33 27 26
## [11,] 26 26 10 1 26 2 35 13
## [12,] 27 31 22 40 43 4 23 20
## [13,] 5 16 12 30 33 10 3 38
## [14,] 27 30 20 30 29 33 31 20
## [15,] 28 6 46 25 10 5 48 29
## [16,] 9 43 17 16 13 25 2 20
## [17,] 29 8 46 24 43 8 47 30
## [18,] 35 22 35 11 11 25 23 41
## [19,] 8 22 40 48 25 21 3 48
## [20,] 26 39 46 20 26 45 9 8
## [21,] 7 31 30 40 7 18 39 39
## [22,] 42 48 15 3 25 42 30 10
## [23,] 9 17 24 29 23 31 31 17
## [24,] 19 50 49 36 26 6 30 18
## [25,] 36 49 23 44 32 7 9 40
## [26,] 14 34 43 22 20 41 36 42
## [27,] 17 4 7 49 24 48 34 11
## [28,] 43 13 29 42 9 17 22 49
## [29,] 39 5 15 20 41 45 44 44
## [30,] 12 25 23 11 37 28 41 2
## [31,] 15 22 26 8 23 40 28 19
## [32,] 32 25 38 46 14 7 29 13
## [33,] 42 32 46 21 46 20 43 13
## [34,] 45 46 32 45 6 18 39 31
## [35,] 7 25 7 2 27 29 45 30
## [36,] 9 23 27 43 1 17 37 22
## [37,] 41 35 42 13 26 33 43 30
## [38,] 10 40 5 46 42 2 33 10
## [39,] 23 48 6 6 49 49 19 16
## [40,] 27 30 16 8 17 2 12 5
## [41,] 7 12 24 44 29 13 34 36
## [42,] 27 31 32 32 26 24 21 50
## [43,] 32 46 21 36 27 49 17 33
## [44,] 38 30 11 45 21 3 12 16
## [45,] 25 35 36 14 7 18 30 28
## [46,] 34 14 44 16 26 2 35 49
## [47,] 29 29 46 23 41 37 30 12
## [48,] 5 32 19 33 20 12 25 28
## [49,] 8 7 25 40 6 13 50 2
## [50,] 12 3 39 40 50 42 47 50
Ingest into a dense TileDB array:
uri <- file.path(tempdir(), "dense-array")
if (dir.exists(uri)) unlink(uri, recursive = TRUE)
tiledb::fromMatrix(obj = m, uri = uri)
tiledb::schema(uri)
## tiledb_array_schema(
## domain=tiledb_domain(c(tiledb_dim(name="rows", domain=c(1L,50L), tile=50L, type="INT32"), tiledb_dim(name="cols", domain=c(1L,8L), tile=8L, type="INT32"))),
## attrs=c(tiledb_attr(name="x", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))),
## cell_order="COL_MAJOR", tile_order="COL_MAJOR", capacity=10000, sparse=FALSE, allows_dups=FALSE,
## coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
## offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
## validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
## )
Slicing either dimension by a range produces a matrix identical to what you would get from R:
m1 <- m[1:10, 1:5]
m1
## [,1] [,2] [,3] [,4] [,5]
## [1,] 31 13 23 26 10
## [2,] 15 18 15 9 25
## [3,] 14 33 21 7 8
## [4,] 3 27 37 34 18
## [5,] 42 25 8 48 9
## [6,] 50 38 10 13 7
## [7,] 43 21 50 19 7
## [8,] 37 15 42 47 10
## [9,] 14 41 44 39 24
## [10,] 25 47 34 4 23
This is true whether indexing via [
-notation
# fetches 10x5 matrix as expected
r1a <- tiledb_array(
uri = uri,
return_as = "matrix",
)[1:10, 1:5]
r1a
## [,1] [,2] [,3] [,4] [,5]
## [1,] 31 13 23 26 10
## [2,] 15 18 15 9 25
## [3,] 14 33 21 7 8
## [4,] 3 27 37 34 18
## [5,] 42 25 8 48 9
## [6,] 50 38 10 13 7
## [7,] 43 21 50 19 7
## [8,] 37 15 42 47 10
## [9,] 14 41 44 39 24
## [10,] 25 47 34 4 23
## attr(,"query_status")
## [1] "COMPLETE"
all(m1 == r1a)
## [1] TRUE
Or using the selected_ranges
argument:
r1b <- tiledb_array(
uri = uri,
return_as = "matrix",
selected_ranges = list(cbind(1:10, 1:10), cbind(1:5, 1:5))
)[]
r1b
## [,1] [,2] [,3] [,4] [,5]
## [1,] 31 13 23 26 10
## [2,] 15 18 15 9 25
## [3,] 14 33 21 7 8
## [4,] 3 27 37 34 18
## [5,] 42 25 8 48 9
## [6,] 50 38 10 13 7
## [7,] 43 21 50 19 7
## [8,] 37 15 42 47 10
## [9,] 14 41 44 39 24
## [10,] 25 47 34 4 23
## attr(,"query_status")
## [1] "COMPLETE"
all(m1 == r1b)
## [1] TRUE
Expected output for point queries.
m2 <- m[c(3, 5), c(3, 5)]
m2
## [,1] [,2]
## [1,] 21 8
## [2,] 8 9
Point queries performed with [
-notation doesn't produce the expected output. Instead it returns a matrix equivalent to the min/max of the selected points for each dimension:
# returns a matrix equivalent to m[3:5, 3:5]
r2a <- tiledb_array(
uri = uri,
return_as = "matrix"
)[c(3, 5), c(3, 5)]
r2a
## [,1] [,2] [,3]
## [1,] 21 7 8
## [2,] 37 34 18
## [3,] 8 48 9
## attr(,"query_status")
## [1] "COMPLETE"
# all(m2 == r2a)
# ERROR: Non-conformable arrays
Point queries performed with selected_points
does produce the expected output:
r2b <- tiledb_array(
uri = uri,
return_as = "matrix",
selected_points = list(c(3, 5), c(3, 5))
)[]
r2b
## [,1] [,2]
## [1,] 21 8
## [2,] 8 9
## attr(,"query_status")
## [1] "COMPLETE"
all(m2 == r2b)
## [1] TRUE
Attribute filters result in NAs for values that don't meet the filter criteria:
r3b <- tiledb_array(
uri = uri,
return_as = "matrix",
selected_points = list(c(3, 5), c(3, 5)),
query_condition = parse_query_condition(x > 13)
)[]
r3b
## [,1] [,2]
## [1,] 21 NA
## [2,] NA NA
## attr(,"query_status")
## [1] "COMPLETE"