Skip to content

Instantly share code, notes, and snippets.

@rbdixon
Created August 7, 2015 20:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rbdixon/3c84841afb9926ec6ccc to your computer and use it in GitHub Desktop.
Save rbdixon/3c84841afb9926ec6ccc to your computer and use it in GitHub Desktop.
#
# FROM dplyr join.r
#
inner_geo_join <- function(x, y, by = NULL, within = 0, ...) {
UseMethod("inner_geo_join")
}
left_geo_join <- function(x, y, by = NULL, within = 0, ...) {
UseMethod("left_geo_join")
}
right_geo_join <- function(x, y, by = NULL, within = 0, ...) {
UseMethod("right_geo_join")
}
full_geo_join <- function(x, y, by = NULL, within = 0, ...) {
UseMethod("full_geo_join")
}
environment(inner_geo_join) = asNamespace("dplyr")
environment(left_geo_join) = asNamespace("dplyr")
environment(right_geo_join) = asNamespace("dplyr")
environment(full_geo_join) = asNamespace("dplyr")
#
# FROM dplyr tbl-sql.R
#
inner_geo_join.tbl_sql <- function(x, y, by = NULL,
auto_index = FALSE, within = 0, ...) {
by <- common_by(by, x, y)
sql <- sql_geo_join(x$src$con, x, y, type = "inner", by = by, within = within)
update(tbl(x$src, sql), group_by = groups(x))
}
left_geo_join.tbl_sql <- function(x, y, by = NULL,
auto_index = FALSE, within = 0, ...) {
by <- common_by(by, x, y)
sql <- sql_geo_join(x$src$con, x, y, type = "left", by = by, within = within)
update(tbl(x$src, sql), group_by = groups(x))
}
right_geo_join.tbl_sql <- function(x, y, by = NULL,
auto_index = FALSE, within = 0, ...) {
by <- common_by(by, x, y)
sql <- sql_geo_join(x$src$con, x, y, type = "right", by = by, within = within)
update(tbl(x$src, sql), group_by = groups(x))
}
full_geo_join.tbl_sql <- function(x, y, by = NULL,
auto_index = FALSE, within = 0, ...) {
by <- common_by(by, x, y)
sql <- sql_geo_join(x$src$con, x, y, type = "full", by = by, within = 0)
update(tbl(x$src, sql), group_by = groups(x))
}
environment(inner_geo_join.tbl_sql) = asNamespace("dplyr")
environment(left_geo_join.tbl_sql) = asNamespace("dplyr")
environment(right_geo_join.tbl_sql) = asNamespace("dplyr")
environment(full_geo_join.tbl_sql) = asNamespace("dplyr")
#
# FROM dplyr dbi-s3.r
#
sql_geo_join <- function(con, x, y, type = "inner", by = NULL, within = 0, ...) {
join <- switch(type,
left = sql("LEFT"),
inner = sql("INNER"),
right = sql("RIGHT"),
full = sql("FULL"),
stop("Unknown join type:", type, call. = FALSE)
)
by <- common_by(by, x, y)
using <- all(by$x == by$y)
# Ensure tables have unique names
x_names <- auto_names(x$select)
y_names <- auto_names(y$select)
uniques <- unique_names(x_names, y_names, by$x[by$x == by$y])
if (is.null(uniques)) {
sel_vars <- c(x_names, y_names)
} else {
x <- update(x, select = setNames(x$select, uniques$x))
y <- update(y, select = setNames(y$select, uniques$y))
by$x <- unname(uniques$x[by$x])
by$y <- unname(uniques$y[by$y])
sel_vars <- unique(c(uniques$x, uniques$y))
}
if (using) {
stop("by parameter is required")
} else {
on <- sql_vector(paste0("ST_DWithin(", sql_escape_ident(con, by$x), ", ", sql_escape_ident(con, by$y), ", ", within, ")" ),
collapse = " AND ", parens = TRUE)
cond <- build_sql("ON ", on, con = con)
}
from <- build_sql(
'SELECT * FROM ',
sql_subquery(con, x$query$sql), "\n\n",
join, " JOIN \n\n" ,
sql_subquery(con, y$query$sql), "\n\n",
cond, con = con
)
attr(from, "vars") <- lapply(sel_vars, as.name)
cat(from)
from
}
environment(sql_geo_join) = asNamespace("dplyr")
@rbdixon
Copy link
Author

rbdixon commented Aug 7, 2015

This is a preliminary attempt at monkey-patching dplyr to support ST_DWithin() geographic joins supported by the PostGIS extensions to Postgres. There's a decent chance this would work with other geospatial databases but I've not tried to work with anything other than PostGIS.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment