Skip to content

Instantly share code, notes, and snippets.

@obrl-soil
Created April 3, 2019 12:45
Show Gist options
  • Save obrl-soil/002659f46932602985c7de52f9b8c21c to your computer and use it in GitHub Desktop.
Save obrl-soil/002659f46932602985c7de52f9b8c21c to your computer and use it in GitHub Desktop.
# faffing about with methods to approximate a "lag vertex polygon" (Poore, 1986)
# or "best-fit regular geometric feature" (Stoddard, 1965) for polygon shape
# metric
# References:
# Poore 1986 - MSc Thesis on soil map quantification at
# https://ir.library.oregonstate.edu/downloads/zk51vk04q
# Stoddart, D. (1965). The shape of atolls. Marine Geology, 3(5), 369–383.
# doi:10.1016/0025-3227(65)90025-3 (its on scihub)
# also apparently Fridland 1972 "The pattern of the soil cover" but that's
# not online ;_;
# basically this is looking for a single-polygon simplification that is a)
# as close to n vertices as possible and b) as close to equidistant sides as
# possible. It would appear to assume a relatively high-detail polygon as input,
# with no islands and holes ignored if present.
library(sf)
library(tidyverse)
nc = st_read(system.file("shape/nc.shp", package="sf"))
nc1 <- nc[1, ]
nc1 <- st_transform(nc1, 3857) # shhh don't need max precision, just meters
st_cast(nc1, 'POINT') %>% nrow() # 27 points natively
# add points to a certain level of detail (0.5 km here)
nc1d <- st_segmentize(nc1, 500)
plot(st_cast(nc1d[0], 'POINT'), pch = 20, col = 'red')
# filter to get a set of points x dist apart (20 * 500 here - 10 km)
# but this is counted along the perimeter so the points aren't really
# equidistant as required. More problematic as shape gets more convoluted...
nc1_lp <- st_cast(nc1d, 'POINT') %>%
# may as well start from a random vertex:
filter((row_number() + sample(seq(nrow(.)), 1)) %% 20 == 0)
plot(nc1[0], reset = FALSE, border = 'grey60')
plot(nc1_lp[0], pch = 20, col = 'red', add = TRUE)
# surprisingly shape preserving although obs not foolproof
# defo only for simple polygons! try nc[4, ] haha
# back to poly with
lag_ish_poly <- st_coordinates(nc1_lp) %>%
rbind(., .[1, ]) %>%
list(.) %>%
st_polygon()
plot(lag_ish_poly, col = NA, border = 'red', add = TRUE)
# to get the lag sums after Stoddard's description:
## "The polygon thus formed can be described by summing the distances between
## all the vertices lag-one, lag-two,..., until all unique sums are determined;
## and by squaring and then summing the distances between all vertices lag-one,
## lag-two..., until all unique squared sums are determined. Each shape will
## then have a one-to-one correspondence with one of these sets of sums"
# and
## "Distances lag-one (S1) are given by AC, BD, CE,... HB, i.e., one vertex is
## skipped. Distances lag-two (S2) are given by AD, BE, CF:... HC; i.e., two
## vertices are skipped. Distances lag-three (S3) are given by AE, BF, CG,...
## HD; i.e., three vertices are skipped"
# Poore generalises this to (n-2/2) lag steps, where n is polygon vertices. This
# conveniently(?) gave him ten lag measurements for each 22-sided shape.
lagsums <- function(pts = NULL) {
nr <- nrow(pts)
lags <- map_dbl(seq(2, (nr - 2)/2), function(l) {
# subset in a loop
start <- pts[1, ]
mids <- pts[seq(nr) %% l == 0, ]
end <- pts[l, ]
lagset <- rbind(start, mids)
lagset1 <- rbind(mids, end)
lag_d <- as.numeric(st_distance(lagset, lagset1, by_element = TRUE))
mean(lag_d) # gets around lack of perf. equal side dist
})
matrix(c(lags, lags^2), ncol = 2, byrow = FALSE)
}
# anyway apparently this encodes the shape to a given precision...
lagsums(nc1_lp)
df <- as.data.frame(lagsums(nc1_lp)) %>%
mutate(LAG = seq(n()) + 1)
# but they don't plot to a nice smooth curve like the examples in Poore (p.37ish)
ggplot(df, aes(x = LAG, y = V1)) +
geom_line() +
theme_minimal()
# what happens if one segmentises and then runs ms_simplify, not worrying about
# edge length e.g.
nc1_mss <-
rmapshaper::ms_simplify(nc1d,
# gets as close to n vertices as possible:
keep = 1 / st_cast(nc1d, 'POINT') %>% nrow() * 25) %>%
st_cast(., 'POINT')
plot(nc1[0], reset = FALSE, border = 'grey60')
plot(nc1_mss[0], pch = 20, col = 'red', add = TRUE)
# messier!
lagsums(nc1_mss)
df2 <- as.data.frame(lagsums(nc1_mss)) %>%
mutate(LAG = seq(n()) + 1)
ggplot(df2, aes(x = LAG, y = V1)) +
geom_line() +
theme_minimal()
# anyway tbh there are simpler measures of shape/form/complexity but this
# is still intriguing
@mdsumner
Copy link

mdsumner commented Apr 3, 2019

Here's what I was thinking!

  library(sf)
#> Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3

nc <- read_sf(system.file("shape/nc.shp", package="sf"))
layout(matrix(seq_len(nrow(nc)), n2mfrow(nrow(nc))))
par(mar = rep(0, 4))
for (sf_i in seq_len(nrow(nc))) {
 nc1 <- st_cast(nc[sf_i,, drop = FALSE ], "POLYGON")
 nc1 <- nc1[which.max(st_area(nc1)), ]
 

  pts <- st_coordinates(nc1)[, 1:2, drop = FALSE]
  dpts <- st_coordinates(st_segmentize(nc1, 500))[, 1:2, drop = FALSE]
  
  ## start somewhere
  i <- 1
  currentpoint <- firstpoint <- dpts[i, , drop = FALSE]
  chunkdistance <- 2e4
  dist_to_end <- Inf
  cnt <- 1
  nexti <- 1
  ii <- list(i)
  while(dist_to_end > chunkdistance || nexti > nrow(dpts)) {
    wch <- which(geodist::geodist(dpts, currentpoint) > chunkdistance)
    if (length(wch) < 1) {
      warning(sprintf("no distances greater than %0.2f", chunkdistance))
      break;
    }
    nexti <- wch[wch > nexti][1]
    currentpoint <- dpts[nexti, , drop = FALSE]
    ## ignore the first time through
    if (cnt > 1) dist_to_end <- geodist::geodist(currentpoint, firstpoint)[1]
    cnt <- cnt + 1
    ii[cnt] <- nexti
    # print(nexti)
    # print(firstpoint)
    # print(currentpoint)
    # print(dist_to_end)
    
  }
  ii[cnt+1] <- nrow(dpts)
  index <- unique(unlist(ii))
  plot(pts, asp = 1, axes = FALSE, ylab = "", xlab = "")
  lines(dpts[index, ], col = "darkgrey")
  points(dpts[index, ], pch = 19, cex = 0.3, col = "firebrick")
}
## deleted bunch o warnings ...

Created on 2019-04-04 by the reprex package (v0.2.1)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment