Skip to content

Instantly share code, notes, and snippets.

@asinghvi17
Last active May 9, 2024 13:52
Show Gist options
  • Save asinghvi17/6cd96854b315f1beeec68cda953d0663 to your computer and use it in GitHub Desktop.
Save asinghvi17/6cd96854b315f1beeec68cda953d0663 to your computer and use it in GitHub Desktop.
Hook into RCall.jl to convert SF geometries to GeoInterface wrappers
#=
This file provides integration for R's SF objects
which are essentially geo-dataframes,
into Julia DataFrames with GeoInterface wrapper
geometries.
This requires https://github.com/JuliaInterop/RCall.jl/pull/528,
so run `Pkg.add((; url = "https://github.com/asinghvi17/RCall.jl", rev = "patch-1"))` to get that.
TODO:
- Evaluate all of this in a separate R environment (see `RCall.newEnvironment` in https://github.com/JuliaInterop/RCall.jl/blob/15178034400d449fb7500663717a9165c77ba020/src/methods.jl#L564-L574)
- Test on all sorts of shapefiles/input
- Examples of using `tigris` for this.
=#
using RCall
import RCall: rcopy, rcopytype, sexp
import OrderedCollections
R"library(sf)"
# Import geographic libraries
import GeoFormatTypes as GFT
import GeoInterface as GI
# Set up data
nc = R"""
st_read(system.file("shape/nc.shp", package="sf"))
"""
nc |> RCall.getclass # 2 element vector
g1 = R"$nc$geometry[1]"
# tests
R"library(tigris)"
R"library(tidycensus)"
R"""census_api_key("$(ENV["CENSUS_API_KEY"])")"""
manhattan_roads = rcopy((R"""roads("NY", "New York")"""))
orange = R"""
get_acs(
state = "CA",
county = "Orange",
geography = "tract",
variables = "B19013_001",
geometry = TRUE,
year = 2020
)
""" |> rcopy
using DataFrames
nm_orange = dropmissing(orange)
f, a, p = poly(nm_orange.geometry; color = nm_orange.estimate, axis = (; title = "B19013_001 variable for Orange County, CA"))
Colorbar(f[1, 2], p; label = "Estimate")
f
# Now, you can copy any R geom directly...
R"""
p <- rbind(c(3.2,4), c(3,4.6), c(3.8,4.4), c(3.5,3.8), c(3.4,3.6), c(3.9,4.5))
(mp <- st_multipoint(p))
## MULTIPOINT ((3.2 4), (3 4.6), (3.8 4.4), (3.5 3.8), (3.4 3.6), (3.9 4.5))
s1 <- rbind(c(0,3),c(0,4),c(1,5),c(2,5))
(ls <- st_linestring(s1))
## LINESTRING (0 3, 0 4, 1 5, 2 5)
s2 <- rbind(c(0.2,3), c(0.2,4), c(1,4.8), c(2,4.8))
s3 <- rbind(c(0,4.4), c(0.6,5))
(mls <- st_multilinestring(list(s1,s2,s3)))
p1 <- rbind(c(0,0), c(1,0), c(3,2), c(2,4), c(1,4), c(0,0))
p2 <- rbind(c(1,1), c(1,2), c(2,2), c(1,1))
pol <-st_polygon(list(p1,p2))
p3 <- rbind(c(3,0), c(4,0), c(4,1), c(3,1), c(3,0))
p4 <- rbind(c(3.3,0.3), c(3.8,0.3), c(3.8,0.8), c(3.3,0.8), c(3.3,0.3))[5:1,]
p5 <- rbind(c(3,3), c(4,2), c(4,3), c(3,3))
(mpol <- st_multipolygon(list(list(p1,p2), list(p3,p4), list(p5))))
## MULTIPOLYGON (((0 0, 1 0, 3 2, 2 4, 1 4, 0 0), (1 1, 1 2, 2 2, 1 1)), ((3 0, 4 0, 4 1, 3 1, 3 0), (3.3 0.3, 3.3 0.8, 3.8 0.8, 3.8 0.3, 3.3 0.3)), ((3 3, 4 2, 4 3, 3 3)))
(gc <- st_geometrycollection(list(mp, mpol, ls)))
"""
mp = R"mp"
ls = R"ls"
mls = R"mls"
pol = R"pol"
mpol = R"mpol"
# The way RCall works is that it iterates over the available classes,
# see the code in https://github.com/JuliaInterop/RCall.jl/blob/15178034400d449fb7500663717a9165c77ba020/src/convert/default.jl#L8-L22,
# and checks to see if any of those return a method which it can use in Julia.
# So, we will override first the RCall checking function, for `sf` class.
#
# Below is an example of how to do this for a multipolygon:
RCall.rcopytype(::Type{RCall.RClass{:sfc_MULTIPOLYGON}}, s::Ptr{RCall.VecSxp}) = Vector{GI.MultiPolygon}
RCall.rcopytype(::Type{RCall.RClass{:sfc_POLYGON}}, s::Ptr{RCall.VecSxp}) = Vector{GI.Polygon}
RCall.rcopytype(::Type{RCall.RClass{:sfc_LINESTRING}}, s::Ptr{RCall.VecSxp}) = Vector{GI.LineString}
RCall.rcopytype(::Type{RCall.RClass{:sfc_MULTILINESTRING}}, s::Ptr{RCall.VecSxp}) = Vector{GI.MultiLineString}
RCall.rcopytype(::Type{RCall.RClass{:sfc_MULTIPOINT}}, s::Ptr{RCall.VecSxp}) = Vector{GI.MultiPoint}
RCall.rcopytype(::Type{RCall.RClass{:sfc_POINT}}, s::Ptr{RCall.VecSxp}) = Vector{GI.Point}
# We can distinguish between `sfc` and `sfg` types,
# by specifying Z and M in different places.
# For sfc, we leave it unspecified, or the alternative
# which is better is that we can specify that they should be
# copied to vectors.
_tuplepointtype(::GI.Point{hasZ, hasM}) where {hasZ, hasM} = GI.Point{hasZ, hasM, NTuple{2+hasZ+hasM, Float64}, Nothing}
_tuplepointtype(::GI.LineString{hasZ, hasM}, crs::CRSType = nothing) where {hasZ, hasM, CRSType} = GI.LineString{hasZ, hasM, Vector{GI.Point{hasZ, hasM, Tuple{Float64, Float64}, Nothing}}, Nothing, Nothing}
function RCall.rcopy(::Type{Vector{GI.MultiPolygon}}, mpol_sfc::Ptr{VecSxp})
# First, get the metadata and understand what's going on
crs = get_sf_crs(mpol_sfc)
# Now, get the `sfg` object out from the `sfc` object:
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(mpol_sfc)
LineStringType = GI.LineString{hasZ, hasM, Vector{GI.Point{hasZ, hasM, Tuple{Float64, Float64}, Nothing}}, Nothing, Nothing}
PolyType = GI.Polygon{hasZ, hasM, Vector{LineStringType}, Nothing, typeof(crs)}
multipolys = [ # The reason we need this is that GI wrapper geoms don't currently support empty geometries...
GI.MultiPolygon{hasZ, hasM, Vector{PolyType}, Nothing, typeof(crs)}(
[PolyType(LineStringType[
GI.LineString(_pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls)))
for ls in pol
],
nothing, crs
)
for pol in mpol
],
nothing, crs
) for mpol in mpol_sfc
]
return multipolys
end
function RCall.rcopy(::Type{Vector{GI.Polygon}}, pol_sfc::Ptr{VecSxp})
# First, get the metadata and understand what's going on
crs = get_sf_crs(pol_sfc)
# Now, get the `sfg` object out from the `sfc` object:
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(pol_sfc)
polys = [GI.Polygon([ GI.LineString(_pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls))) for ls in pol]; crs) for pol in pol_sfc]
return polys
end
function RCall.rcopy(::Type{Vector{GI.Polygon}}, mls_sfc::Ptr{VecSxp})
# First, get the metadata and understand what's going on
crs = get_sf_crs(mls_sfc)
# Now, get the `sfg` object out from the `sfc` object:
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(mls_sfc)
mls = [GI.MultiLineString([ GI.LineString(_pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls))) for ls in mls_sfc]; crs) for pol in mls_sfc]
return mls
end
function RCall.rcopy(::Type{Vector{GI.LineString}}, ls_sfc::Ptr{VecSxp})
# First, get the metadata and understand what's going on
crs = get_sf_crs(ls_sfc)
# Now, get the `sfg` object out from the `sfc` object:
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(ls_sfc)
linestrings = [ GI.LineString(_pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls)); crs) for ls in ls_sfc]
return linestrings
end
function RCall.rcopy(::Type{Vector{GI.MultiPoint}}, ls_sfc::Ptr{VecSxp})
# First, get the metadata and understand what's going on
crs = get_sf_crs(ls_sfc)
# Now, get the `sfg` object out from the `sfc` object:
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(ls_sfc)
linestrings = [ GI.MultiPoint(_pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls)); crs) for ls in ls_sfc]
return linestrings
end
function RCall.rcopy(::Type{Vector{GI.Point}}, point_sfc::Ptr{VecSxp})
# First, get the metadata and understand what's going on
crs = get_sf_crs(ls_sfc)
# Now, get the `sfg` object out from the `sfc` object:
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(ls_sfc)
points = [ _pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls)) for ls in ls_sfc]
return GI.Point.(points; crs) # re-wrap the points, but with CRS.
end
function _has_z_m_from_first_geometry_of_sfc(geom)
fg = first(geom) # the sf object can hold only one sfg geometry in this case!
ndims_string = rcopy(String, first(RCall.getclass(fg))) # this is where dimensions are always held
hasZ, hasM = if ndims_string == "XY"
false, false
elseif ndims_string == "XYZ"
true, false
elseif ndims_string == "XYZM"
true, true
elseif ndims_string == "XYM"
false, true
else
error("Unknown dimensions: $ndims_string")
end
return hasZ, hasM
end
# The constraints should evaluate statically, and the allocations
# should also be constrained to a single point here.
# Ideally, we would actually do this at the time of copying...but oh well.
# That can always be optimized later!
function _pointify_matrix_by_row(point_type::Type{GI.Point{hasZ, hasM}},mat::Matrix) where {hasZ, hasM}
if hasZ && hasM
return @views point_type.(mat[:, 1], mat[:, 2], mat[:, 3], mat[:, 4])
elseif hasZ
return @views point_type.(mat[:, 1], mat[:, 2], mat[:, 3])
elseif hasM
return @views point_type.(mat[:, 1], mat[:, 2], mat[:, 3])
else
return @views point_type.(mat[:, 1], mat[:, 2])
end
end
function get_sf_crs(o::RCall.OrderedDict)
ks = keys(o)
return if :wkt in ks
GFT.ESRIWellKnownText(GFT.CRS(), o[:wkt])
elseif :esri in ks
GFT.EPSG(o[:esri])
else
@warn "No good CRS found in the dict, setting it to `nothing`"
nothing
end
end
function get_sf_crs(s::Ptr{RCall.VecSxp})
crs_dict= RCall.rcopy(R"st_crs($(s))")
return get_sf_crs(crs_dict)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment