Sleepingwell/gist:5843755

## gistfile1.r
# A wrapper around Rcartogram::cartogram for shape files.
#
# @param polys:     Either a SpatialPolygonsDataFrame or the path to a shape file.
# @param variable:  Either a string giving the name of the variable to used in polys@data, or a vector
#                   of the same length as polys@polygons.
# @param nx:        The number of columns in the raster used to represent the region.
# @param ny:        The number of rows in the raster used to represent the region.
# @param buffer.by: The proportion width of the buffer to add to the region (see ?Rcartogram::addBoundary).
#
# @return           The new boundaries as A SpatialPolygonsDataFrame with the attribute 'cartogram.scaling.var'.
#                   If variable is not a string, then this is added to the slot 'data' of the result.
#
# @warning          This should really only be used for polygons which are in an equal area projection
#                   (see http://en.wikipedia.org/wiki/Category:Equal-area_projections) due to the areal
#                   scaling using the input polygon areas.
#
# @warning          Not sure that this does properly respect projections... should be investigated by the
#                   interested user (though I would be interested in hearing :-).
cartogram.polys <- function(polys, variable, nx=100, ny=100, buffer.by=0.5) {
    require(raster)
    require(maptools)
    require(Rcartogram)
    # if you have passed in a string for polys, assume it is a file and open it
    # otherwise, polys must be a SpatialPolygonsDataFrame
    if(typeof(polys) == "character") polys <- readShapePoly(polys)
    else stopifnot("SpatialPolygonsDataFrame" %in% class(polys))

    # get the bounds for scaling... I'm not sure how to get this (i.e. the cartogram) to work with pure lat/lons.
    bnds <- do.call('rbind', sapply(polys@polygons, function(poly)
        t(sapply(poly@Polygons, function(poly1) c(min(poly1@coords[,1]), max(poly1@coords[,1]), min(poly1@coords[,2]), max(poly1@coords[,2]))))
    ))
    bnds <- c(min(bnds[,1]), max(bnds[,2]), min(bnds[,3]), max(bnds[,4]))
    offsets <- c(bnds[1], bnds[3])
    scalers <- c(bnds[2]-bnds[1], bnds[4]-bnds[3])

    # Create a new PolygonDataFrame where all the nested polygons are brought to the top level.
    # We do this because it makes the call to rasterise below simple (... or do I mean 'possible'?)
    # we don't scale the coordinates here because we want to maintain the projection when we do
    # the rasterisation below.
    new.polys <- SpatialPolygons(do.call('c', lapply(polys@polygons, function(poly) {
        mapply(function(poly1, id) Polygons(list(poly1), paste(poly@ID, id, sep='.')), poly@Polygons, 1:length(poly@Polygons), SIMPLIFY=F)
    })))

    # get the proportional area of each 'sub-polygon'. The variable of interest is then pro-rated
    # accross the sub-polygons using these proportional areas. Note that I make a feeble attempt
    # at dealing with holes, in that the divisor only includes those polygons that are not holes,
    # but it would be better to determine which sub-polygon the hole is in, then scale that polygon
    # explicitly.
    pareas <- lapply(polys@polygons, function(poly) {
        areas <- sapply(poly@Polygons, function(p) p@area)
        areas / sum(areas[!sapply(poly@Polygons, function(p) p@hole)])
    })

    # if variable is a string, then assume it is a column of the data slot of polys,
    # otherwise, use it directly.
    if(mode(variable) == 'character') {
        variable.name <- variable
        variable <- polys@data[[variable]]
    } else {
        stopifnot(length(variable) == nrow(polys@data))
        variable.name <- 'cartogram.scaling.var'
        polys@data[[variable.name]] <- variable
    }
    variable <- do.call('c', mapply(function(val, areas) val*areas, variable, pareas))

    # create a raster of the values of interest.
    # WARNING: I haven't looked into whether this will do the jobby properly.
    r <- raster(nrow=ny, ncol=nx)
    proj4string(r) <- proj4string(polys)
    extent(r) <- extent(polys)
    rpm <- as.matrix(rasterize(new.polys, r, variable))

    # now we scale the polygon coordinates.
    new.polys <- SpatialPolygons(do.call('c', lapply(new.polys@polygons, function(poly) {
        mapply(function(poly1, id) {
            poly1@coords <- cbind(nx*(poly1@coords[,1]-offsets[1])/scalers[1], ny*(poly1@coords[,2]-offsets[2])/scalers[2])
            Polygons(list(poly1), paste(poly@ID, id, sep='.'))
        }, poly@Polygons, 1:length(poly@Polygons), SIMPLIFY=F)
    })))

    # replace the missing values and creating padding with the mean of the other values
    # (as per example in demo(synthetic)). Note that there is an un-documented third argument
    # to addBoundary (look at the source).
    rmp.mean <- mean(unlist(rpm), na.rm=T)
    rpm[is.na(rpm)] <- rmp.mean
    rpp <- addBoundary(rpm, buffer.by, rmp.mean)

    # do the calculations and work out the changed boundaries. This is just following the recipie
    # in demo(synthetic) (in Rcartogram).
    rpc <- cartogram(rpp)
    added <- as.integer((dim(rpp) - dim(rpm))/2)
    res.polys <- lapply(new.polys@polygons, function(p) {
        lapply(p@Polygons, function(x) {
            preds <- predict(rpc, x@coords[,1] + added[1], x@coords[,2] + added[2])
            # scale back to original space.
            Polygon(cbind((preds$x - added[1]) * scalers[1] + offsets[1], (preds$y - added[2]) * scalers[2] + offsets[2]))
        })
    })

    # convert back to a set of polygons with the same grouping as in polys.
    sp.polys <- split(res.polys, sapply(new.polys@polygons, function(x) strsplit(x@ID, '\\.')[[1]][1]))
    structure(
        SpatialPolygonsDataFrame(
            SpatialPolygons(mapply(function(p, nm) Polygons(do.call('c', p), nm), sp.polys, names(sp.polys)), proj4string=CRS(as.character(proj4string(polys)))),
            polys@data
        ),
        cartogram.scaling.var=variable.name
    )
}

#-----------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------
# testing
#-----------------------------------------------------------------------------------------------------
library(maptools)
shpf <- 'd:/temp/polys/anra_geog94.shp'
# use area, simply because it will be there for all SpatialPolygonsDataFrame
polys <- readShapePoly(shpf)
areas <- sapply(polys@polygons, function(p) sum(sapply(p@Polygons, function(p) p@area)))
pc <- cartogram.polys(shpf, areas)
plot(pc)
	# A wrapper around Rcartogram::cartogram for shape files.
	#
	# @param polys: Either a SpatialPolygonsDataFrame or the path to a shape file.
	# @param variable: Either a string giving the name of the variable to used in polys@data, or a vector
	# of the same length as polys@polygons.
	# @param nx: The number of columns in the raster used to represent the region.
	# @param ny: The number of rows in the raster used to represent the region.
	# @param buffer.by: The proportion width of the buffer to add to the region (see ?Rcartogram::addBoundary).
	#
	# @return The new boundaries as A SpatialPolygonsDataFrame with the attribute 'cartogram.scaling.var'.
	# If variable is not a string, then this is added to the slot 'data' of the result.
	#
	# @warning This should really only be used for polygons which are in an equal area projection
	# (see http://en.wikipedia.org/wiki/Category:Equal-area_projections) due to the areal
	# scaling using the input polygon areas.
	#
	# @warning Not sure that this does properly respect projections... should be investigated by the
	# interested user (though I would be interested in hearing :-).
	cartogram.polys <- function(polys, variable, nx=100, ny=100, buffer.by=0.5) {
	require(raster)
	require(maptools)
	require(Rcartogram)
	# if you have passed in a string for polys, assume it is a file and open it
	# otherwise, polys must be a SpatialPolygonsDataFrame
	if(typeof(polys) == "character") polys <- readShapePoly(polys)
	else stopifnot("SpatialPolygonsDataFrame" %in% class(polys))

	# get the bounds for scaling... I'm not sure how to get this (i.e. the cartogram) to work with pure lat/lons.
	bnds <- do.call('rbind', sapply(polys@polygons, function(poly)
	t(sapply(poly@Polygons, function(poly1) c(min(poly1@coords[,1]), max(poly1@coords[,1]), min(poly1@coords[,2]), max(poly1@coords[,2]))))
	))
	bnds <- c(min(bnds[,1]), max(bnds[,2]), min(bnds[,3]), max(bnds[,4]))
	offsets <- c(bnds[1], bnds[3])
	scalers <- c(bnds[2]-bnds[1], bnds[4]-bnds[3])

	# Create a new PolygonDataFrame where all the nested polygons are brought to the top level.
	# We do this because it makes the call to rasterise below simple (... or do I mean 'possible'?)
	# we don't scale the coordinates here because we want to maintain the projection when we do
	# the rasterisation below.
	new.polys <- SpatialPolygons(do.call('c', lapply(polys@polygons, function(poly) {
	mapply(function(poly1, id) Polygons(list(poly1), paste(poly@ID, id, sep='.')), poly@Polygons, 1:length(poly@Polygons), SIMPLIFY=F)
	})))

	# get the proportional area of each 'sub-polygon'. The variable of interest is then pro-rated
	# accross the sub-polygons using these proportional areas. Note that I make a feeble attempt
	# at dealing with holes, in that the divisor only includes those polygons that are not holes,
	# but it would be better to determine which sub-polygon the hole is in, then scale that polygon
	# explicitly.
	pareas <- lapply(polys@polygons, function(poly) {
	areas <- sapply(poly@Polygons, function(p) p@area)
	areas / sum(areas[!sapply(poly@Polygons, function(p) p@hole)])
	})

	# if variable is a string, then assume it is a column of the data slot of polys,
	# otherwise, use it directly.
	if(mode(variable) == 'character') {
	variable.name <- variable
	variable <- polys@data[[variable]]
	} else {
	stopifnot(length(variable) == nrow(polys@data))
	variable.name <- 'cartogram.scaling.var'
	polys@data[[variable.name]] <- variable
	}
	variable <- do.call('c', mapply(function(val, areas) val*areas, variable, pareas))

	# create a raster of the values of interest.
	# WARNING: I haven't looked into whether this will do the jobby properly.
	r <- raster(nrow=ny, ncol=nx)
	proj4string(r) <- proj4string(polys)
	extent(r) <- extent(polys)
	rpm <- as.matrix(rasterize(new.polys, r, variable))

	# now we scale the polygon coordinates.
	new.polys <- SpatialPolygons(do.call('c', lapply(new.polys@polygons, function(poly) {
	mapply(function(poly1, id) {
	poly1@coords <- cbind(nx(poly1@coords[,1]-offsets[1])/scalers[1], ny(poly1@coords[,2]-offsets[2])/scalers[2])
	Polygons(list(poly1), paste(poly@ID, id, sep='.'))
	}, poly@Polygons, 1:length(poly@Polygons), SIMPLIFY=F)
	})))

	# replace the missing values and creating padding with the mean of the other values
	# (as per example in demo(synthetic)). Note that there is an un-documented third argument
	# to addBoundary (look at the source).
	rmp.mean <- mean(unlist(rpm), na.rm=T)
	rpm[is.na(rpm)] <- rmp.mean
	rpp <- addBoundary(rpm, buffer.by, rmp.mean)

	# do the calculations and work out the changed boundaries. This is just following the recipie
	# in demo(synthetic) (in Rcartogram).
	rpc <- cartogram(rpp)
	added <- as.integer((dim(rpp) - dim(rpm))/2)
	res.polys <- lapply(new.polys@polygons, function(p) {
	lapply(p@Polygons, function(x) {
	preds <- predict(rpc, x@coords[,1] + added[1], x@coords[,2] + added[2])
	# scale back to original space.
	Polygon(cbind((preds$x - added[1]) * scalers[1] + offsets[1], (preds$y - added[2]) * scalers[2] + offsets[2]))
	})
	})

	# convert back to a set of polygons with the same grouping as in polys.
	sp.polys <- split(res.polys, sapply(new.polys@polygons, function(x) strsplit(x@ID, '\\.')[[1]][1]))
	structure(
	SpatialPolygonsDataFrame(
	SpatialPolygons(mapply(function(p, nm) Polygons(do.call('c', p), nm), sp.polys, names(sp.polys)), proj4string=CRS(as.character(proj4string(polys)))),
	polys@data
	),
	cartogram.scaling.var=variable.name
	)
	}

	#-----------------------------------------------------------------------------------------------------
	#-----------------------------------------------------------------------------------------------------
	# testing
	#-----------------------------------------------------------------------------------------------------
	library(maptools)
	shpf <- 'd:/temp/polys/anra_geog94.shp'
	# use area, simply because it will be there for all SpatialPolygonsDataFrame
	polys <- readShapePoly(shpf)
	areas <- sapply(polys@polygons, function(p) sum(sapply(p@Polygons, function(p) p@area)))
	pc <- cartogram.polys(shpf, areas)
	plot(pc)