Skip to content

Instantly share code, notes, and snippets.

@ajdamico
Created February 22, 2016 20:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ajdamico/0c256ed3a77d77eecfd6 to your computer and use it in GitHub Desktop.
Save ajdamico/0c256ed3a77d77eecfd6 to your computer and use it in GitHub Desktop.
La_rs bughunt
# # # # # # # # # # # # # # # # #
# # set the working directory # #
# # # # # # # # # # # # # # # # #
# setwd( "C:/My Directory/SWMAP/" )
# # # # # # # # # # # # # # # #
# # example survey data set # #
# # # # # # # # # # # # # # # #
# american community survey
# # # # # # # # # # # # # # # # # # # # #
# # different from other maps because # #
# # # # # # # # # # # # # # # # # # # # #
# displays a non-ordinal categorical variable
# crosses the international date line
# # # # # # # # # # # # # # # # # #
# # smallest level of geography # #
# # # # # # # # # # # # # # # # # #
# state, public use microdata areas
# # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # asdfree.com blog post for this survey microdata # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # #
# http://www.asdfree.com/search/label/american%20community%20survey%20%28acs%29
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # r code repository for setup and analysis examples # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# https://github.com/ajdamico/asdfree/tree/master/American%20Community%20Survey
# # # # # # # # # # # # #
# # value of interest # #
# # # # # # # # # # # # #
# disproportionate shares of veterans of foreign wars (categorical)
# # # # # # #
# # flaws # #
# # # # # # #
# map presents shares that are *disproportionately* higher than the statewide average.
# in absolute numbers, gulf war veterans outnumber other veteran categories in four of the five pumas.
# # # # # # # # # # # # # # # # # # # # #
# # step 1: load the survey microdata # #
# remove the # in order to run this install.packages line only once
# install.packages( c( "MonetDB.R" , "MonetDBLite" ) , repos=c("http://dev.monetdb.org/Assets/R/", "http://cran.rstudio.com/"))
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
library(downloader)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# download the 2013 american community survey microdata onto the local disk
# path.to.7z <- "7za" # # only macintosh and *nix users need this line
single.year.datasets.to.download <- 2013
three.year.datasets.to.download <- NULL
five.year.datasets.to.download <- NULL
source_url( "https://raw.githubusercontent.com/ajdamico/asdfree/master/American%20Community%20Survey/download%20all%20microdata.R" , prompt = FALSE , echo = TRUE )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# # end of step 1 # #
# # # # # # # # # # #
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # step 2: conduct your analysis of interest at the smallest geography allowed # #
library(survey)
library(MonetDB.R)
library(MonetDBLite)
library(scales)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# connect to the database
dbfolder <- paste0( getwd() , "/MonetDB" )
db <- dbConnect( MonetDBLite() , dbfolder )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# # # # run your analysis commands # # # #
# subset the design to only alaska before actually constructing the design
acs.alaska <- dbGetQuery( db , 'select * from acs2013_1yr_m where st = 2' )
# note: this is not allowed for taylor-series linearized designs
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# disconnect from the current monet database
dbDisconnect( db )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# construct a svrepdesign object
alaska.design <-
svrepdesign(
weight = ~pwgtp ,
repweights = 'pwgtp[1-9]' ,
scale = 4 / 80 ,
rscales = rep( 1 , 80 ) ,
mse = TRUE ,
data = acs.alaska
)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# create a denominator variable indicating any period of service
alaska.design <- update( alaska.design , vet = as.numeric( vps > 0 ) )
# create a categorical variable indicating era of service
alaska.design <-
update(
alaska.design ,
gulf = as.numeric( vps %in% 1:5 ) ,
vietnam = as.numeric( vps %in% 6:8 ) ,
other = as.numeric( vps %in% 9:15 )
)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# statewide era of service shares
sw <- svyratio( ~ gulf + vietnam + other , ~ vet , alaska.design , na.rm = TRUE )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# puma-specific era of service shares
ps <- svyby( ~ gulf + vietnam + other , denominator = ~ vet , by = ~ puma , alaska.design , svyratio , na.rm = TRUE )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# find the disproportionate shares
ds <- ps[ , 2:4 ] - matrix( coef( sw ) , 5 , 3 , byrow = T )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# so look at this table.
ds
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# pumas 101 and 300 have veterans that disproportionately served during the gulf wars (up to the present)
# pumas 102 and 200 have veterans that disproportionately served during the vietnam war
# puma 400 has veterans that disproportionately served during another era
# hold on to these disproportionate shares and the standard errors of the original ratios.
alaska.pumas <- cbind( ps[ 1 ] , ds , ps[ , 5:7 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# note that the standard error of the ratio statistic
# and the standard error of the ratio of
# the difference between statewide and puma-level statistics
# are probably not the same. well, i'm sure of it.
# if you're an academic statistician, you might be mad at me
# for making this half-assed calculation right here.
# github makes it easy to patch and edit and update other people's code.
# go for it ;)
# remove those slashvets from the column names
names( alaska.pumas ) <- gsub( "/vet" , "" , names( alaska.pumas ) )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# these are the small area statistics to be mapped
print( alaska.pumas )
# the standard errors are a measure of precision,
# their inverse will serve as the mapping weights
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# make this object easier to type
sas <- alaska.pumas
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# # end of step 2 # #
# # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # step 3: download and import necessary geographic crosswalks # #
library(downloader)
library(maptools)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# load the download_cached and related functions
# to prevent re-downloading of files once they've been downloaded.
source_url(
"https://raw.github.com/ajdamico/asdfree/master/Download%20Cache/download%20cache.R" ,
prompt = FALSE ,
echo = FALSE
)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# create a temporary file containing the census bureau's
# 2010 census tract to 2010 puma crosswalk
# then download the file.
ctpxw.tf <- tempfile()
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
download_cached(
"http://www2.census.gov/geo/docs/maps-data/data/rel/2010_Census_Tract_to_2010_PUMA.txt" ,
ctpxw.tf ,
mode = 'wb'
)
# note: to re-download a file from scratch, add the parameter usecache = FALSE
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# import this csv file into an R data.frame object
ctpxw <- read.csv( ctpxw.tf )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# match the column names of sf1 and of the `sas` output
names( ctpxw ) <- c( 'state' , 'county' , 'tract' , 'puma' )
# immediately limit this to alaskan census tracts
ak.ctpxw <- subset( ctpxw , state == 2 )
# clear up RAM
rm( ctpxw ) ; gc()
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# create a temporary file containing the census bureau's
# 2010 census summary file #1 for alaska
# then download the file.
sf1ak.tf <- tempfile()
download_cached(
"ftp://ftp2.census.gov/census_2010/04-Summary_File_1/Alaska/ak2010.sf1.zip" ,
sf1ak.tf ,
mode = 'wb'
)
# note: to re-download a file from scratch, add the parameter usecache = FALSE
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# unzip the summary file #1 files
sf1ak.uz <- unzip( sf1ak.tf , exdir = tempdir() )
# file layout from http://www.census.gov/prod/cen2010/doc/sf1.pdf#page=18
sf1ak <- read.fwf( sf1ak.uz[ grep( "akgeo2010" , sf1ak.uz ) ] , c( -8 , 3 , -16 , 2 , 3 , -22 , 6 , 1 , 4 , -253 , 9 , -9 , 11 , 12 ) )
# add columns names matching the census bureau, so it's easy to read
names( sf1ak ) <- c( "sumlev" , "state" , "county" , "tract" , "blkgrp" , "block" , "pop100" , "intptlat" , "intptlon" )
# summary level 101 has census tracts and census blocks
sf1ak.101 <- subset( sf1ak , sumlev == "101" )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# merge these files together
sf1ak.101 <- merge( sf1ak.101 , ak.ctpxw )
# the number of records and population sums serve
# as a check to confirm that this merge worked
# one record per census block in alaska. see? same number.
nrow( sf1ak.101 )
# https://www.census.gov/geo/maps-data/data/tallies/census_block_tally.html
# and guess what? the total alaska population matches as well.
sum( sf1ak.101$pop100 )
# http://quickfacts.census.gov/qfd/states/02000.html
# clear up RAM
rm( sf1ak ) ; gc()
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# so now we have a data.frame object with
# one record per census block,
# and also with the geography (puma)
# that matches the american community survey
head( sf1ak.101 )
# and guess what?
# we've now got the census 2010 weighted populations (field pop100)
# and also each census block's centroid latitude & longitude (fields intptlat + intptlon)
# # end of step 3 # #
# # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # step 4: merge the results of your survey analysis with the small-area geography # #
# confirm that we've created all possible geographies correctly.
# the number of records in our small area statistics..
sas.row <- nrow( sas )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# ..should equal the number of unique-match-merged records..
mrow <- nrow( merge( unique( sf1ak.101[ "puma" ] ) , sas ) )
# ..and it does/they do.
stopifnot( sas.row == mrow )
# now the census block-level alaska census data *could* merge if you wanted it to.
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# but you don't. yet.
# the standard error (the `se.` fields) are measures of precision.
print( sas )
# the smaller the standard error, the more confident you should be
# that the estimate at a particular geography is correct.
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# so invert them. you heard me. invert them.
sas$invse.gulf <- 1 / sas$se.gulf
sas$invse.vietnam <- 1 / sas$se.vietnam
sas$invse.other <- 1 / sas$se.other
# a smaller standard error indicates more precision.
# for our purposes, precision can be considered weight! #
# now we've got the weight that we should give each of our estimates #
# distribute that weight across all census blocks #
# aggregate the 2010 census block populations to the geographies that you have.
popsum <- aggregate( sf1ak.101$pop100 , by = ( sf1ak.101[ "puma" ] ) , sum )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# make the column name meaningful
names( popsum )[ names( popsum ) == 'x' ] <- 'popsum'
# merge the popsum onto the sasfile
sas <- merge( sas , popsum )
# now. merge
# the disproportionate veteran era in each puma (the variable of interest)
# the inverted standard errors (the total weight of the broad geography)
# the population sum (the total population of all census blocks that are part of that geography)
x <- merge( sf1ak.101 , sas )
# confirm no record loss
stopifnot( nrow( x ) == nrow( sf1ak.101 ) )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# (this is the fun part)
# calculate the weight at each census block
x$weight.gulf <- x$invse.gulf * ( x$pop100 / x$popsum )
x$weight.vietnam <- x$invse.vietnam * ( x$pop100 / x$popsum )
x$weight.other <- x$invse.other * ( x$pop100 / x$popsum )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# note that weight of all census blocks put together
# sums to the `invse` on the original analysis file
stopifnot( sum( x$weight.gulf ) == sum( sas$invse.gulf ) )
stopifnot( sum( x$weight.vietnam ) == sum( sas$invse.vietnam ) )
stopifnot( sum( x$weight.other ) == sum( sas$invse.other ) )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# remove records with zero population across all three measures
x <- subset( x , weight.gulf > 0 | weight.vietnam > 0 | weight.other > 0 )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# scale all weights so that they average to one
x$weight.gulf <- x$weight.gulf / mean( x$weight.gulf )
x$weight.vietnam <- x$weight.vietnam / mean( x$weight.viet )
x$weight.other <- x$weight.other / mean( x$weight.other )
# you're done preparing your data.
# keep only the columns you need.
x <- x[ , c( 'gulf', 'vietnam' , 'other' , 'weight.gulf' , 'weight.vietnam' , 'weight.other' , 'intptlat' , 'intptlon' ) ]
# pop quiz: which states are the furthest north, east, south, west?
# if you guessed alaska, maine, hawaii, alaska, you are wrong!
# the answer is alaska, alaska, hawaii, alaska.
# a few of the aleutians cross the international date line.
# do you want to keep the edges of the aleutian islands in your map?
# of course you do! here's an ultra-simple recode to keep them gridded together.
x <- transform( x , intptlon = ifelse( intptlon > 0 , intptlon - 360 , intptlon ) )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# # end of step 4 # #
# # # # # # # # # # #
# # # # # # # # # # # #
# # step 5: outline # #
library(maptools)
library(raster)
library(rgeos)
library(rgdal)
library(ggplot2)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# make a character vector containing the shapefiles to download
shftd <-
c(
# download the clipped alaska public use microdata area map, described
# https://www.census.gov/geo/maps-data/maps/2010puma/st02_ak.html
'http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_02_puma10_500k.zip' ,
# download the clipped nationwide state outlines
'http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_state_500k.zip' ,
# download the roads in alaska
'http://www2.census.gov/geo/tiger/TIGER2013/PRISECROADS/tl_2013_02_prisecroads.zip'
)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# initiate a function to download and import all census bureau shapefiles
daiacbsf <-
function( fn , myproj = "+init=epsg:2163" ){
tf <- tempfile()
# # note: to re-download a file from scratch, add the parameter usecache = FALSE # #
download_cached( fn , tf , mode = 'wb' )
# unzip the downloaded file to a temporary directory
shp.uz <- unzip( tf , exdir = tempdir() )
# figure out which filename ends with "shp"
sfname <- grep( 'shp$' , shp.uz , value = TRUE )
# read in the shapefile, using the correct layer
sf <- readOGR( sfname , layer = gsub( "\\.shp" , "" , basename( sfname ) ) )
# project this shapefile immediately
# this projection (and a few others) keeps
# the aleutian islands that cross the
# international date line easy to work with.
spTransform( sf , CRS( myproj ) )
}
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# run all downloads at once, store the result in a list.
asf <- sapply( shftd , daiacbsf )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# pull out the clipped state borders of alaska only
alaska.borders <- subset( asf[[2]] , STATEFP == '02' )
# plot as-is. see how the aleutians screw up the map?
plot( alaska.borders )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# add puma boundaries
plot( asf[[1]] , add = TRUE )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# refresh the map with state borders only
plot( alaska.borders )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# add roads
plot( asf[[3]] , add = TRUE , col = 'red' )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# draw a rectangle 15% bigger than the original state
ak.shp.blank <- as( 1.3 * extent( alaska.borders ) , "SpatialPolygons" )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# calculate the difference between the rectangle and the actual shape
ak.shp.diff <- gDifference( ak.shp.blank , alaska.borders )
# this will be used to cover up points outside of alaska's state borders
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# this box will later blank out the surrounding area
plot( ak.shp.diff )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# # end of step 5 # #
# # # # # # # # # # #
# # # # # # # # # # # # # # # # # #
# # step 6: tie knots and krige # #
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
library(sqldf)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# # warning warning # # # # warning warning # #
# alaska has a vast geography and highly skewed population centers
# kriging functions might not converge. that's why there are other options ;)
# # warning warning # # # # warning warning # #
# how many knots should you make? #
# knots are the computationally-intensive part of this process,
# choose as many as your computer and your patience can handle.
# you should aim for between 100 - 999 knots,
# but numbers closer to 1,000 will overload smaller computers
# you could let the `fields` package attempt to guess knots for you,
# xknots <- cover.design( cbind( x$intptlon , x$intptlat ) , 100 )$design
# but with census microdata, you've already got easy access to a relevant geographic grouping
# the sqldf() function doesn't like `.` in data.frame object names
sf1s <- sf1ak.101
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# exactamundo same transform operation as you saw previously on `x`
sf1s <- transform( sf1s , intptlon = ifelse( intptlon > 0 , intptlon - 360 , intptlon ) )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# within each county x census tract
# calculate the population-weighted mean of the coordinates
ct.knots <-
sqldf(
"select
county , tract ,
sum( pop100 ) as pop100 ,
sum( pop100 * intptlon ) / sum( pop100 ) as intptlon ,
sum( pop100 * intptlat ) / sum( pop100 ) as intptlat
from sf1s
group by
county , tract"
)
# note: this screws up coordinates that cross the international date line
# or the equator. in the united states, only alaska's aleutian islands do this
# and we're mapping alaska, aren't we? good thing we fixed it, huh?
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# interpolation option one #
library(fields)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
krig.fit.gulf <-
Krig(
cbind( x$intptlon , x$intptlat ) ,
Y = x$gulf ,
weights = x$weight.gulf ,
knots = cbind( ct.knots$intptlon , ct.knots$intptlat )
# if you prefer to use cover.design, all you'd need is this knots= line instead:
# knots = xknots
)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
krig.fit.vietnam <-
Krig(
cbind( x$intptlon , x$intptlat ) ,
x$vietnam ,
weights = x$weight.vietnam ,
knots = cbind( ct.knots$intptlon , ct.knots$intptlat )
# if you prefer to use cover.design, all you'd need is this knots= line instead:
# knots = xknots
)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
krig.fit.other <-
Krig(
cbind( x$intptlon , x$intptlat ) ,
x$other ,
weights = x$weight.other ,
knots = cbind( ct.knots$intptlon , ct.knots$intptlat )
# if you prefer to use cover.design, all you'd need is this knots= line instead:
# knots = xknots
)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# that is: what is the (weighted) relationship between
# your variable of interest (veteran service eras) and
# the x/y points on a grid?
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# check this out!
surface( krig.fit.gulf )
surface( krig.fit.vietnam )
surface( krig.fit.other )
# you're almost there!
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# interpolation option two #
library(mgcv)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
gam.gulf <-
gam(
gulf ~ s( intptlon , intptlat ) ,
weights = weight.gulf ,
data = x
)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
gam.vietnam <-
gam(
vietnam ~ s( intptlon , intptlat ) ,
weights = weight.vietnam ,
data = x
)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
gam.other <-
gam(
other ~ s( intptlon , intptlat ) ,
weights = weight.other ,
data = x
)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# # end of step 6 # #
# # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # #
# # step 7: make a grid and predict # #
# use as fine of a grid as your computer can handle
grid.length <- 750
# # note: smaller grids will render faster
# # (so they're better if you're just playing around)
# # but larger grids will prevent your final plot from
# # being too pixelated, even when zooming in
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
x.range <- c( min( x$intptlon ) , max( x$intptlon ) )
y.range <- c( min( x$intptlat ) , max( x$intptlat ) )
# add five percent on each side
x.diff <- abs( x.range[ 2 ] - x.range[ 1 ] ) * 0.2
y.diff <- abs( y.range[ 2 ] - y.range[ 1 ] ) * 0.2
x.range[ 1 ] <- x.range[ 1 ] - x.diff
x.range[ 2 ] <- x.range[ 2 ] + x.diff
y.range[ 1 ] <- y.range[ 1 ] - y.diff
y.range[ 2 ] <- y.range[ 2 ] + y.diff
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
grd <- krig.grd <- gam.grd <-
expand.grid(
intptlon = seq( x.range[ 1 ] , x.range[ 2 ] , length = grid.length ) ,
intptlat = seq( y.range[ 1 ] , y.range[ 2 ] , length = grid.length )
)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# along your rectangular grid,
# what are the predicted values of
# each veteran era category
krig.grd$gulf <- predict( krig.fit.gulf , krig.grd[ , 1:2 ] )
krig.grd$vietnam <- predict( krig.fit.vietnam , krig.grd[ , 1:2 ] )
krig.grd$other <- predict( krig.fit.other , krig.grd[ , 1:2 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
gam.grd$gulf <- predict( gam.gulf , gam.grd[ , 1:2 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
gam.grd$vietnam <- predict( gam.vietnam , gam.grd[ , 1:2 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
gam.grd$other <- predict( gam.other , gam.grd[ , 1:2 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# remember that these values have been re-scaled
# as how disproportionate they are from the state-wide averages.
# therefore, negative values are possible.
sapply( krig.grd , summary )
sapply( gam.grd , summary )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# what we're really hoping for is that
# the overall mean averages out to zero
sum( sapply( gam.grd , summary )[ 4 , 3:5 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# in general, these predictions at each point should approximately sum to zero
summary( rowSums( krig.grd[ , 3:5 ] ) )
summary( rowSums( gam.grd[ , 3:5 ] ) )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))
# # end of step 7 # #
# # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # #
# # step 8: limit information and color # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # warning # # # warning # # # # # # warning # # # # # # warning # # # # # # warning # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# if your data is are not binomial, then by mapping with a single image, you lose clarity #
# if you have three levels of information and you generate two maps, you can get an idea #
# about the entire distribution of the variable. if you attempt encoding three levels or #
# more into a single map, you will explode. just kidding rofl lmao but you will lose info #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # warning # # # warning # # # # # # warning # # # # # # warning # # # # # # warning # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
library(scales)
# from among the three categories, find the maximum disproportionate share
krig.grd$svccat <- c( 'gulf' , 'vietnam' , 'other' )[ apply( krig.grd[ , 3:5 ] , 1 , which.max ) ]
# save only that max
krig.grd$statistic <- apply( krig.grd[ , 3:5 ] , 1 , max )
# it's important to note that i've thrown out a lot of information here
krig.grd <- krig.grd[ , c( 'intptlon' , 'intptlat' , 'statistic' , 'svccat' ) ]
# do any points not make sense?
summary( krig.grd$statistic )
# yup, the minimum is below zero.
krig.grd$statistic <- pmax( 0 , krig.grd$statistic )
# from among the three categories, find the maximum disproportionate share
gam.grd$svccat <- c( 'gulf' , 'vietnam' , 'other' )[ apply( gam.grd[ , 3:5 ] , 1 , which.max ) ]
# save only that max
gam.grd$statistic <- apply( gam.grd[ , 3:5 ] , 1 , max )
# it's important to note that i've thrown out a lot of information here
gam.grd <- gam.grd[ , c( 'intptlon' , 'intptlat' , 'statistic' , 'svccat' ) ]
# again, do any points not make sense?
summary( gam.grd$statistic )
# another point below zero.
gam.grd$statistic <- pmax( 0 , gam.grd$statistic )
# our complex sample survey-computed statistics rely on categories,
# but the final map only shows the _highest_ disproportionate item
# from each puma. for example,
# puma 300 is slightly disproportionately more gulf veterans
# and it's also near evenly-split between being
# slightly disproportionately less vietnam vets and
# slightly disproportionately less other era vets
# puma 101 is disproportionately more gulf vets too,
# but it has very heavily disproportionately fewer vietnam vets
# and has close to state-average veterans from other eras.
sas
# only the "disproportionately more" share variable gets retained
# in these predictions. all other information gets thrown away.
# this is the nature of mapping categorical variables
# if you are intent on showing a multi-color gradient with all information,
# you can use the rgb() function, but fair warning:
# the values mush together quickly and your map will probably look like ass.
# i tried building color gradients to map multi-dimensional categorical values
# like the multi-category values in
ps
# but on the color gradient, the red/green/blue values on the palette tend to mush together.
# for example, on this plot right here..
plot( 1:5 , rep( 1 , 5 ) , cex = 3 , pch = 16 , col = mapply( rgb , ps[ , 2 ] , ps[ , 3 ] , ps[ , 4 ] ) )
# red is gulf veterans, green is vietnam veterans, blue is other veterans.
# the colors end up just looking drab.
# even when re-scaled..
rsps <- apply( ps[ , 2:4 ] , 2 , rescale )
# ..the points with high relative rates in two categories (because they're the lowest in the third)
# have a lot of mixture (puma 300) and are therefore indecipherable. what is the color brown here?
plot( 1:5 , rep( 1 , 5 ) , cex = 3 , pch = 16 , col = mapply( rgb , rsps[ , 1 ] , rsps[ , 2 ] , rsps[ , 3 ] ) )
# high vietnam era and also high gulf era service.
text( 1:5 , rep( 1.2 , 5 ) , ps[ , 1 ] )
# multi-dimensional categorical variable coloring is a nightmare.
# you have to simplify it.
# simplifying it means throwing out information.
# now where were we?
library(RColorBrewer)
# draw three gradients
tg <-
lapply(
brewer.pal( 3 , 'Set1' ) ,
function( z ) colorRampPalette( c( 'white' , z ) )( 101 )
)
# check out each of these three colors, mapped from opaque to intense.
plot( rep( 0:100 , 3 ) , rep( c( -1 , 0 , 1 ) , each = 101 ) , col = unlist( tg ) , pch = 16 , cex = 3 )
# draw an alternate three gradients
# that start at ~20% ( that is: 25 / 125 )
# and also use a different palette from colorbrewer2.org
tag <-
lapply(
brewer.pal( 3 , 'Dark2' ) ,
function( z ) colorRampPalette( c( 'white' , z ) )( 125 )[ 25:125 ]
)
# check out each of these three colors, mapped from opaque to intense.
plot( rep( 0:100 , 3 ) , rep( c( -1 , 0 , 1 ) , each = 101 ) , col = unlist( tag ) , pch = 16 , cex = 3 )
# # rescale both of the interpolated grids
krig.grd$statistic <- krig.grd$statistic * ( 1 / max( krig.grd$statistic ) )
gam.grd$statistic <- gam.grd$statistic * ( 1 / max( gam.grd$statistic ) )
# note that the re-scaling gets done across all categories,
# and not individually within each category.
# add the hex color identifier
krig.grd$color.value <-
ifelse( krig.grd$svccat == 'gulf' , tg[[1]][ round( krig.grd$statistic * 100 ) ] ,
ifelse( krig.grd$svccat == 'vietnam' , tg[[2]][ round( krig.grd$statistic * 100) ] ,
ifelse( krig.grd$svccat == 'other' , tg[[3]][ round( krig.grd$statistic * 100 ) ] ,
NA ) ) )
# awwwwwwww yeah, something's happening now.
plot( krig.grd$intptlon , krig.grd$intptlat , col = krig.grd$color.value , pch = 16 , cex = 3 )
# add the alternate hex color identifier
krig.grd$alt.color <-
ifelse( krig.grd$svccat == 'gulf' , tag[[1]][ round( krig.grd$statistic * 100 ) ] ,
ifelse( krig.grd$svccat == 'vietnam' , tag[[2]][ round( krig.grd$statistic * 100) ] ,
ifelse( krig.grd$svccat == 'other' , tag[[3]][ round( krig.grd$statistic * 100 ) ] ,
NA ) ) )
# that looks a bit better to me
plot( krig.grd$intptlon , krig.grd$intptlat , col = krig.grd$alt.color , pch = 16 , cex = 3 )
# lower-bound the alternate color to remove the white lines
krig.grd$bound.color <-
ifelse( krig.grd$svccat == 'gulf' , tag[[1]][ pmax( 5 , round( krig.grd$statistic * 100 ) ) ] ,
ifelse( krig.grd$svccat == 'vietnam' , tag[[2]][ pmax( 5 , round( krig.grd$statistic * 100) ) ] ,
ifelse( krig.grd$svccat == 'other' , tag[[3]][ pmax( 5 , round( krig.grd$statistic * 100 ) ) ] ,
NA ) ) )
# that's smoothing by hand for you.
plot( krig.grd$intptlon , krig.grd$intptlat , col = krig.grd$bound.color , pch = 16 , cex = 3 )
# put that color band on the `gam.grd` data.frame as well
gam.grd$bound.color <-
ifelse( gam.grd$svccat == 'gulf' , tag[[1]][ pmax( 5 , round( gam.grd$statistic * 100 ) ) ] ,
ifelse( gam.grd$svccat == 'vietnam' , tag[[2]][ pmax( 5 , round( gam.grd$statistic * 100) ) ] ,
ifelse( gam.grd$svccat == 'other' , tag[[3]][ pmax( 5 , round( gam.grd$statistic * 100 ) ) ] ,
NA ) ) )
# # end of step 8 # #
# # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # #
# # step 9: ggplot and choose options # #
library(ggplot2)
library(mapproj)
library(scales)
# initiate the krige-based plot
krig.grd$color.column <- as.factor( krig.grd$bound.color )
krg.plot <-
ggplot( data = krig.grd , aes( x = intptlon , y = intptlat ) ) +
geom_point( shape = 15 , colour = krig.grd$color.column ) +
scale_fill_manual( values = unique( krig.grd$bound.color ) )
# initiate the gam-based plot
gam.grd$color.column <- as.factor( gam.grd$bound.color )
gam.plot <-
ggplot( data = gam.grd , aes( x = intptlon , y = intptlat ) ) +
geom_point( shape = 15 , colour = gam.grd$color.column ) +
scale_fill_manual( values = unique( gam.grd$bound.color ) )
# view both grids!
krg.plot
gam.plot
# initiate the entire plot
the.plot <-
# choose only one of the two interpolation grids
krg.plot +
# gam.plot +
# blank out the legend and axis labels
theme(
legend.position = "none" ,
axis.title.x = element_blank() ,
axis.title.y = element_blank()
) +
xlab( "" ) + ylab( "" ) +
# force the x and y axis limits at the shape of the city and don't do anything special for off-map values
scale_x_continuous( limits = c( -191 , -127 ) , breaks = NULL , oob = squish ) +
# since we're going to add lots of surrounding-area detail!
scale_y_continuous( limits = c( 50 , 73 ) , breaks = NULL , oob = squish ) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
axis.ticks = element_blank()
)
# print the plot to the screen
the.plot
# this is the bottom layer.
# initiate an aleutian islands-focused wrap-around function
s360 <- function( z ){ z[ z$long > 0 , 'long' ] <- z[ z$long > 0 , 'long' ] - 360 ; z }
# # alaskan state borders # #
# convert the alaskan borders to longlat,
# prepare for ggplot2 with `fortify`
# wrap edge points around
ab <- s360( fortify( spTransform( alaska.borders , CRS( "+proj=longlat" ) ) ) )
# store this information in a layer
state.border.layer <- geom_path( data = ab , aes( x = long , y = lat , group = group ) , colour = 'darkgrey' )
# plot the result
the.plot + state.border.layer
# # alaskan main roads # #
# convert the alaskan borders to longlat,
# prepare for ggplot2 with `fortify`
# wrap edge points around
akr <- s360( fortify( spTransform( asf[[3]] , CRS( "+proj=longlat" ) ) ) )
# store this information in a layer
state.roads.layer <- geom_path( data = akr , aes( x = long , y = lat , group=group ) , colour = 'darkred' )
# plot the result
the.plot + state.border.layer + state.roads.layer
# # end of step 9 # #
# # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # #
# # step 10: project, blank, and save # #
library(ggplot2)
library(scales)
library(raster)
library(plyr)
library(rgeos)
# exclude outer alaska if you hate the wilderness or something
the.plot + state.border.layer + coord_cartesian( xlim = c( -155 , max( x$intptlon ) ) , ylim = c( min( x$intptlat ) , 70 ) )
# distort the map with simple latitude/longitude scaling
the.plot + state.border.layer + coord_fixed( 2.5 )
# this looks crappy, who knows what it is
the.plot + state.border.layer + coord_equal()
# check out a bunch of other options #
the.plot + state.border.layer + coord_map( project = "cylequalarea" , mean( x$intptlat ) )
# here's the one that makes the most sense for alaska
the.plot + state.border.layer + coord_map( project = "conic" , mean( x$intptlat ) , orientation = c( 90 , 0 , -141 ) )
# see ?mapproject and the ?coord_* functions for a zillion alternatives
# store this projection, but not the state border
the.plot <- the.plot + coord_map( project = "conic" , mean( x$intptlat ) , orientation = c( 90 , 0 , -141 ) )
# into `the.plot`
# force the difference shapefile's projection
proj4string( ak.shp.diff ) <- "+init=epsg:2163"
# initiate the outside blanking layer
outside <- s360( fortify( spTransform( ak.shp.diff , CRS( "+proj=longlat" ) ) ) )
# fix islands piecing together
outside2 <- ddply( outside , .( piece ) , function( x ) rbind( x , outside[ 1 , ] ) )
# convert this fortified object to a ggplot layer
outside.layer <- geom_polygon( data = outside2 , aes( x = long , y = lat , group = id ) , fill = 'white' )
# plot this -- the layer doesn't work, does it?
the.plot + outside.layer
# five points need to change so we have a real bounding box.
subset( outside , lat < 45 | lat > 75 | long < -190 | long > -125 )
# move all of them counter-clockwise by hand
outside[ outside$order %in% c( 1 , 5 ) , 'long' ] <- -116.6568
# outside[ outside$order %in% c( 1 , 5 ) , 'lat' ] <- 20
# outside[ outside$order %in% 4 , 'long' ] <- -220
outside[ outside$order %in% 4 , 'lat' ] <- 37.56767
outside[ outside$order %in% 3 , 'long' ] <- -195.4295
# outside[ outside$order %in% 3 , 'lat' ] <- 100
# outside[ outside$order %in% 2 , 'long' ] <- -100
outside[ outside$order %in% 2 , 'lat' ] <- 79.36447
# fix islands piecing together
outside2 <- ddply( outside , .( piece ) , function( x ) rbind( x , outside[ 1 , ] ) )
# convert this fortified object to a ggplot layer
outside.layer <- geom_polygon( data = outside2 , aes( x = long , y = lat , group = id ) , fill = 'white' )
# plot this.
the.plot + outside.layer
# that's not so bad, i guess.
# i don't care for the state border layer,
# but if you want the state border layer,
# use this save line:
final.plot <- the.plot + outside.layer + state.border.layer
# otherwise use this save line:
# final.plot <- the.plot + outside.layer
# you can airbrush the outside blue border
# in microsoft paint or something
# if you want, right? like a boss.
# save the file to your current working directory
ggsave(
"2013 alaskan veteran service eras.png" ,
plot = final.plot ,
scale = 3
)
# happy?
# # end of step ten # #
# # # # # # # # # # # #
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment