library(duckdbfs)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
db <- duckdbfs::open_dataset("https://data.source.coop/cholmes/eurocrops/unprojected/geoparquet/FR_2018_EC21.parquet")
## how many records
db |> tally()
#> # Source: SQL [1 x 1]
#> # Database: DuckDB v0.9.2 [unknown@Linux 5.15.0-102-generic:R 4.3.3/:memory:]
#> n
#> <dbl>
#> 1 9517874
## lazy queries
db |>
dplyr::filter(CODE_CULTU == "PPH", SURF_PARC > 130)
#> # Source: SQL [?? x 11]
#> # Database: DuckDB v0.9.2 [unknown@Linux 5.15.0-102-generic:R 4.3.3/:memory:]
#> ID_PARCEL SURF_PARC CODE_CULTU CODE_GROUP CULTURE_D1 CULTURE_D2 EC_org_n
#> <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 1095429 164. PPH 18 <NA> <NA> Prairie perm…
#> 2 1078256 422. PPH 18 <NA> <NA> Prairie perm…
#> 3 1095702 140. PPH 18 <NA> <NA> Prairie perm…
#> 4 1075553 190. PPH 18 <NA> <NA> Prairie perm…
#> 5 1091734 164. PPH 18 <NA> <NA> Prairie perm…
#> 6 1079456 251. PPH 18 <NA> <NA> Prairie perm…
#> 7 1075272 184. PPH 18 <NA> <NA> Prairie perm…
#> 8 1095343 143. PPH 18 <NA> <NA> Prairie perm…
#> 9 8643052 186. PPH 18 <NA> <NA> Prairie perm…
#> 10 1071276 489. PPH 18 <NA> <NA> Prairie perm…
#> # ℹ more rows
#> # ℹ 4 more variables: EC_trans_n <chr>, EC_hcat_n <chr>, EC_hcat_c <chr>,
#> # geometry <list>
## now collect
dat <- db |>
dplyr::filter(CODE_CULTU == "PPH", SURF_PARC > 130) |>
select(SURF_PARC, geometry) |> collect()
## get polygons etc
library(wk)
wkb(dat$geometry)[1:4]
#> <wk_wkb[4]>
#> [1] <MULTIPOLYGON (((4.401467 43.4592, 4.40146 43.45919, 4.397464 43.45851, 4.397699 43.45777, 4.39797 43.45724, 4.398791 43.45596...>
#> [2] <MULTIPOLYGON (((4.440002 43.54249, 4.43755 43.54185, 4.433787 43.54085, 4.43234 43.54046, 4.432329 43.54039, 4.433091 43.53927...>
#> [3] <MULTIPOLYGON (((4.480481 43.62814, 4.474601 43.62948, 4.475438 43.62348, 4.469131 43.61992, 4.469244 43.61944, 4.472691 43.62128...>
#> [4] <MULTIPOLYGON (((4.634756 43.49425, 4.634719 43.49428, 4.641771 43.49608, 4.641739 43.49594, 4.641771 43.49608, 4.641907 43.49666...>
plot(wkb(dat$geometry), col = colourvalues::colour_values(dat$SURF_PARC))
maps::map(add = TRUE)
Created on 2024-04-17 with reprex v2.0.2