Skip to content

Instantly share code, notes, and snippets.

@Sleepingwell
Last active December 18, 2015 20:59
Show Gist options
  • Save Sleepingwell/5843755 to your computer and use it in GitHub Desktop.
Save Sleepingwell/5843755 to your computer and use it in GitHub Desktop.
A wrapper around Rcartogram::cartogram for shape files.
# A wrapper around Rcartogram::cartogram for shape files.
#
# @param polys: Either a SpatialPolygonsDataFrame or the path to a shape file.
# @param variable: Either a string giving the name of the variable to used in polys@data, or a vector
# of the same length as polys@polygons.
# @param nx: The number of columns in the raster used to represent the region.
# @param ny: The number of rows in the raster used to represent the region.
# @param buffer.by: The proportion width of the buffer to add to the region (see ?Rcartogram::addBoundary).
#
# @return The new boundaries as A SpatialPolygonsDataFrame with the attribute 'cartogram.scaling.var'.
# If variable is not a string, then this is added to the slot 'data' of the result.
#
# @warning This should really only be used for polygons which are in an equal area projection
# (see http://en.wikipedia.org/wiki/Category:Equal-area_projections) due to the areal
# scaling using the input polygon areas.
#
# @warning Not sure that this does properly respect projections... should be investigated by the
# interested user (though I would be interested in hearing :-).
cartogram.polys <- function(polys, variable, nx=100, ny=100, buffer.by=0.5) {
require(raster)
require(maptools)
require(Rcartogram)
# if you have passed in a string for polys, assume it is a file and open it
# otherwise, polys must be a SpatialPolygonsDataFrame
if(typeof(polys) == "character") polys <- readShapePoly(polys)
else stopifnot("SpatialPolygonsDataFrame" %in% class(polys))
# get the bounds for scaling... I'm not sure how to get this (i.e. the cartogram) to work with pure lat/lons.
bnds <- do.call('rbind', sapply(polys@polygons, function(poly)
t(sapply(poly@Polygons, function(poly1) c(min(poly1@coords[,1]), max(poly1@coords[,1]), min(poly1@coords[,2]), max(poly1@coords[,2]))))
))
bnds <- c(min(bnds[,1]), max(bnds[,2]), min(bnds[,3]), max(bnds[,4]))
offsets <- c(bnds[1], bnds[3])
scalers <- c(bnds[2]-bnds[1], bnds[4]-bnds[3])
# Create a new PolygonDataFrame where all the nested polygons are brought to the top level.
# We do this because it makes the call to rasterise below simple (... or do I mean 'possible'?)
# we don't scale the coordinates here because we want to maintain the projection when we do
# the rasterisation below.
new.polys <- SpatialPolygons(do.call('c', lapply(polys@polygons, function(poly) {
mapply(function(poly1, id) Polygons(list(poly1), paste(poly@ID, id, sep='.')), poly@Polygons, 1:length(poly@Polygons), SIMPLIFY=F)
})))
# get the proportional area of each 'sub-polygon'. The variable of interest is then pro-rated
# accross the sub-polygons using these proportional areas. Note that I make a feeble attempt
# at dealing with holes, in that the divisor only includes those polygons that are not holes,
# but it would be better to determine which sub-polygon the hole is in, then scale that polygon
# explicitly.
pareas <- lapply(polys@polygons, function(poly) {
areas <- sapply(poly@Polygons, function(p) p@area)
areas / sum(areas[!sapply(poly@Polygons, function(p) p@hole)])
})
# if variable is a string, then assume it is a column of the data slot of polys,
# otherwise, use it directly.
if(mode(variable) == 'character') {
variable.name <- variable
variable <- polys@data[[variable]]
} else {
stopifnot(length(variable) == nrow(polys@data))
variable.name <- 'cartogram.scaling.var'
polys@data[[variable.name]] <- variable
}
variable <- do.call('c', mapply(function(val, areas) val*areas, variable, pareas))
# create a raster of the values of interest.
# WARNING: I haven't looked into whether this will do the jobby properly.
r <- raster(nrow=ny, ncol=nx)
proj4string(r) <- proj4string(polys)
extent(r) <- extent(polys)
rpm <- as.matrix(rasterize(new.polys, r, variable))
# now we scale the polygon coordinates.
new.polys <- SpatialPolygons(do.call('c', lapply(new.polys@polygons, function(poly) {
mapply(function(poly1, id) {
poly1@coords <- cbind(nx*(poly1@coords[,1]-offsets[1])/scalers[1], ny*(poly1@coords[,2]-offsets[2])/scalers[2])
Polygons(list(poly1), paste(poly@ID, id, sep='.'))
}, poly@Polygons, 1:length(poly@Polygons), SIMPLIFY=F)
})))
# replace the missing values and creating padding with the mean of the other values
# (as per example in demo(synthetic)). Note that there is an un-documented third argument
# to addBoundary (look at the source).
rmp.mean <- mean(unlist(rpm), na.rm=T)
rpm[is.na(rpm)] <- rmp.mean
rpp <- addBoundary(rpm, buffer.by, rmp.mean)
# do the calculations and work out the changed boundaries. This is just following the recipie
# in demo(synthetic) (in Rcartogram).
rpc <- cartogram(rpp)
added <- as.integer((dim(rpp) - dim(rpm))/2)
res.polys <- lapply(new.polys@polygons, function(p) {
lapply(p@Polygons, function(x) {
preds <- predict(rpc, x@coords[,1] + added[1], x@coords[,2] + added[2])
# scale back to original space.
Polygon(cbind((preds$x - added[1]) * scalers[1] + offsets[1], (preds$y - added[2]) * scalers[2] + offsets[2]))
})
})
# convert back to a set of polygons with the same grouping as in polys.
sp.polys <- split(res.polys, sapply(new.polys@polygons, function(x) strsplit(x@ID, '\\.')[[1]][1]))
structure(
SpatialPolygonsDataFrame(
SpatialPolygons(mapply(function(p, nm) Polygons(do.call('c', p), nm), sp.polys, names(sp.polys)), proj4string=CRS(as.character(proj4string(polys)))),
polys@data
),
cartogram.scaling.var=variable.name
)
}
#-----------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------
# testing
#-----------------------------------------------------------------------------------------------------
library(maptools)
shpf <- 'd:/temp/polys/anra_geog94.shp'
# use area, simply because it will be there for all SpatialPolygonsDataFrame
polys <- readShapePoly(shpf)
areas <- sapply(polys@polygons, function(p) sum(sapply(p@Polygons, function(p) p@area)))
pc <- cartogram.polys(shpf, areas)
plot(pc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment