Skip to content

Instantly share code, notes, and snippets.

@obrl-soil
Last active February 18, 2017 07:51
Show Gist options
  • Save obrl-soil/0c51f99558e2704d77b6786b92df0fd9 to your computer and use it in GitHub Desktop.
Save obrl-soil/0c51f99558e2704d77b6786b92df0fd9 to your computer and use it in GitHub Desktop.
Identify Multipolygons in sp and sf class objects
### sp Polygon objects ###
# x = spatialPolygons or spatialPolygonsDataFrame object only
# by_id = where true, adds a column to @data with a part count for each polygon. Promotes sPolys to sPolydf.
find_mp <- function(x = NULL, id_rows = FALSE) {
nparts <- vector("list", length(x@polygons))
for (i in seq_along(1:length(x@polygons))) {
n <- vector("list", length(x@polygons[[i]]@Polygons))
for (j in seq_along(1:length(x@polygons[[i]]@Polygons))) {
n[[j]] <- x@polygons[[i]]@Polygons[[j]]@hole
}
nparts[[i]] <- sum(unlist(n) == FALSE)
}
PARTS <- unlist(nparts)
if (id_rows == FALSE){
if(sum(PARTS) / nrow(x@data) == 1) {
return(FALSE)
} else {
return(TRUE)
}
} else if(class(x)[[1]] == 'SpatialPolygons') {
x <- as(x, 'SpatialPolygonsDataFrame')
x@data <- cbind(x@data, PARTS)
x@data$dummy <- NULL
} else {
x@data <- cbind(x@data, PARTS)
}
return(x)
}
### sp objects ###
# with thanks to @mdsumner for the fortify-based approach, this should work on point and line objects as well as polys
# x = sp object
# by_id = where true, adds a column to @data with a part count for each polygon. Promotes s* to s*df
find_mp_fort <- function(x = NULL, id_rows = FALSE) {
suppressMessages(xf <- fortify(x))
nparts <- if (is.null(xf$holes)) {
xf %>% distinct(id, group) %>%
group_by(id) %>%
summarize(nparts = n())
} else {
xf %>% distinct(id, group, hole) %>%
filter(hole == FALSE) %>%
group_by(id) %>%
summarise(nparts = n())
}
if (id_rows == FALSE){
if(sum(nparts$nparts) / nrow(nparts) == 1) {
return(FALSE)
} else {
return(TRUE)
}
} else if (class(x)[[1]] == 'SpatialPolygons') {
x <- as(x, 'SpatialPolygonsDataFrame')
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id',
all.x = TRUE, all.y = FALSE)
x@data$dummy <- NULL
} else if (class(x)[[1]] == 'SpatialLines') {
x <- as(x, 'SpatialLinesDataFrame')
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id',
all.x = TRUE, all.y = FALSE)
x@data$dummy <- NULL
} else if (class(x)[[1]] == 'SpatialPoints') {
x <- as(x, 'SpatialPointsDataFrame')
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id',
all.x = TRUE, all.y = FALSE)
x@data$dummy <- NULL
} else {
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id',
all.x = TRUE, all.y = FALSE)
}
x@data$Row.names <- NULL
return(x)
}
### sf objects ###
# x = sf object
# by_id = where true, adds a column reporting the geometry type for each row
# note that you must use st_read() with option promote_to_multi = FALSE if you want a sensible output where by_id = TRUE
st_findmp <- function(x = NULL, id_rows = FALSE) {
nparts_vec <- st_geometry_type(x$geometry)
if (id_rows == FALSE){
return(as.character(unique(st_geometry_type(x))))
} else {
x$nparts <- nparts_vec
return(x)
}
}
@mdsumner
Copy link

mdsumner commented Feb 18, 2017

A more formal treatment would go something like this:

count_paths <- function(x, ...) UseMethod("count_paths")
count_paths.MULTIPOLYGON <- function(x, ...) sum(unlist(lapply(x, length)))
count_paths.POLYGON <- function(x, ...) length(x)
count_paths.LINESTRING <- function(x, ...) 1L
count_paths.MULTILINESTRING <- function(x, ...) length(x)
count_paths.MULTIPOINT <- function(x, ...) nrow(x)
count_paths.POINT <- function(x, ...) 1L

count_paths.sfc <- function(x, ...)  unlist(lapply(x, count_paths))
count_paths.sf <- function(x, ...) count_paths(st_geometry(x))

library(sf)
example(st_read)
count_paths(nc)
library(maptools)
data(wrld_simpl)
count_paths(st_as_sf(wrld_simpl))

We can even pipeline this

library(dplyr)
> st_as_sf(wrld_simpl) %>% mutate(np = count_paths(geometry)) %>% arrange(desc(np)) %>%  filter(np > 200) %>% select(NAME,AREA)
Simple feature collection with 4 features and 2 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -180 ymin: -90 xmax: 180 ymax: 83.57027
epsg (SRID):    4326
proj4string:    +proj=longlat +datum=WGS84 +no_defs
           NAME    AREA                       geometry
1        Canada  909351 MULTIPOLYGON(((-134.4955444...
2     Indonesia  181157 MULTIPOLYGON(((116.04942512...
3 United States  915896 MULTIPOLYGON(((-95.07806396...
4        Russia 1638094 MULTIPOLYGON(((104.26748847...

I'm using "path" to mean "any distinct part, like a linestring, island, hole, or point" - and treating a coordinate as a kind of degenerate go-nowhere "path". It makes sense in a more general context (trust me). Manifold GIS calls this a "branch", but there's no other consistent name anywhere afaik.

(thanks for the inspiration, feel free to agitate on details)

@obrl-soil
Copy link
Author

Awesome, thanks! My dplyr-fu is weak as yet. Fortify is a bit slower but does allow a function that'll work on point and line sp objects, see edits above

for sf, holes are matrices in the geometry list, and parts are sub-lists holding a matrix e.g.

x <- as.list(sf[i, 'geometry]) # what's that warning about anyway...
y <- sapply(x$geometry[[1]], function(x) class(x))

should make it easy to distinguish the two?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment