ajdamico/La_rs_bughunt.R

## La_rs_bughunt.R
# # # # # # # # # # # # # # # # #
# # set the working directory # #
# # # # # # # # # # # # # # # # #

# setwd( "C:/My Directory/SWMAP/" )


# # # # # # # # # # # # # # # #
# # example survey data set # #
# # # # # # # # # # # # # # # #

# american community survey


# # # # # # # # # # # # # # # # # # # # #
# # different from other maps because # #
# # # # # # # # # # # # # # # # # # # # #

# displays a non-ordinal categorical variable
# crosses the international date line


# # # # # # # # # # # # # # # # # #
# # smallest level of geography # #
# # # # # # # # # # # # # # # # # #

# state, public use microdata areas


# # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # asdfree.com blog post for this survey microdata # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # #

# http://www.asdfree.com/search/label/american%20community%20survey%20%28acs%29


# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # r code repository for setup and analysis examples # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #

# https://github.com/ajdamico/asdfree/tree/master/American%20Community%20Survey


# # # # # # # # # # # # #
# # value of interest # #
# # # # # # # # # # # # #

# disproportionate shares of veterans of foreign wars (categorical)


# # # # # # #
# # flaws # #
# # # # # # #

# map presents shares that are *disproportionately* higher than the statewide average.
# in absolute numbers, gulf war veterans outnumber other veteran categories in four of the five pumas.


# # # # # # # # # # # # # # # # # # # # #
# # step 1: load the survey microdata # #

# remove the # in order to run this install.packages line only once
# install.packages( c( "MonetDB.R" , "MonetDBLite" ) , repos=c("http://dev.monetdb.org/Assets/R/", "http://cran.rstudio.com/"))
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

library(downloader)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# download the 2013 american community survey microdata onto the local disk
# path.to.7z <- "7za"							# # only macintosh and *nix users need this line
single.year.datasets.to.download <- 2013
three.year.datasets.to.download <- NULL
five.year.datasets.to.download <- NULL
source_url( "https://raw.githubusercontent.com/ajdamico/asdfree/master/American%20Community%20Survey/download%20all%20microdata.R" , prompt = FALSE , echo = TRUE )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# # end of step 1 # #
# # # # # # # # # # #

xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # step 2: conduct your analysis of interest at the smallest geography allowed # #

library(survey)
library(MonetDB.R)
library(MonetDBLite)
library(scales)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# connect to the database
dbfolder <- paste0( getwd() , "/MonetDB" )

db <- dbConnect( MonetDBLite() , dbfolder )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# # # # run your analysis commands # # # #

# subset the design to only alaska before actually constructing the design
acs.alaska <- dbGetQuery( db , 'select * from acs2013_1yr_m where st = 2' )
# note: this is not allowed for taylor-series linearized designs
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# disconnect from the current monet database
dbDisconnect( db )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# construct a svrepdesign object
alaska.design <-
	svrepdesign(
		weight = ~pwgtp ,
		repweights = 'pwgtp[1-9]' ,
		scale = 4 / 80 ,
		rscales = rep( 1 , 80 ) ,
		mse = TRUE ,
		data = acs.alaska
	)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# create a denominator variable indicating any period of service
alaska.design <- update( alaska.design , vet = as.numeric( vps > 0 ) )

# create a categorical variable indicating era of service
alaska.design <-
	update(
		alaska.design ,
		gulf = as.numeric( vps %in% 1:5 ) ,
		vietnam = as.numeric( vps %in% 6:8 ) ,
		other = as.numeric( vps %in% 9:15 )
	)


xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# statewide era of service shares
sw <- svyratio( ~ gulf + vietnam + other , ~ vet , alaska.design , na.rm = TRUE )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# puma-specific era of service shares
ps <- svyby( ~ gulf + vietnam + other , denominator = ~ vet , by = ~ puma , alaska.design , svyratio , na.rm = TRUE )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# find the disproportionate shares
ds <- ps[ , 2:4 ] - matrix( coef( sw ) , 5 , 3 , byrow = T )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# so look at this table.
ds
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# pumas 101 and 300 have veterans that disproportionately served during the gulf wars (up to the present)

# pumas 102 and 200 have veterans that disproportionately served during the vietnam war

# puma 400 has veterans that disproportionately served during another era

# hold on to these disproportionate shares and the standard errors of the original ratios.
alaska.pumas <- cbind( ps[ 1 ] , ds , ps[ , 5:7 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# note that the standard error of the ratio statistic
# and the standard error of the ratio of
# the difference between statewide and puma-level statistics
# are probably not the same.  well, i'm sure of it.
# if you're an academic statistician, you might be mad at me
# for making this half-assed calculation right here.

# github makes it easy to patch and edit and update other people's code.

# go for it  ;)

# remove those slashvets from the column names
names( alaska.pumas ) <- gsub( "/vet" , "" , names( alaska.pumas ) )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# these are the small area statistics to be mapped
print( alaska.pumas )
# the standard errors are a measure of precision,
# their inverse will serve as the mapping weights
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# make this object easier to type
sas <- alaska.pumas
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# # end of step 2 # #
# # # # # # # # # # #


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # step 3: download and import necessary geographic crosswalks # #

library(downloader)
library(maptools)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# load the download_cached and related functions
# to prevent re-downloading of files once they've been downloaded.
source_url(
	"https://raw.github.com/ajdamico/asdfree/master/Download%20Cache/download%20cache.R" ,
	prompt = FALSE ,
	echo = FALSE
)

xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# create a temporary file containing the census bureau's
# 2010 census tract to 2010 puma crosswalk
# then download the file.
ctpxw.tf <- tempfile()
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

download_cached(
	"http://www2.census.gov/geo/docs/maps-data/data/rel/2010_Census_Tract_to_2010_PUMA.txt" ,
	ctpxw.tf ,
	mode = 'wb'
)
# note: to re-download a file from scratch, add the parameter usecache = FALSE
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# import this csv file into an R data.frame object
ctpxw <- read.csv( ctpxw.tf )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# match the column names of sf1 and of the `sas` output
names( ctpxw ) <- c( 'state' , 'county' , 'tract' , 'puma' )

# immediately limit this to alaskan census tracts
ak.ctpxw <- subset( ctpxw , state == 2 )

# clear up RAM
rm( ctpxw ) ; gc()

xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# create a temporary file containing the census bureau's
# 2010 census summary file #1 for alaska
# then download the file.
sf1ak.tf <- tempfile()

download_cached(
	"ftp://ftp2.census.gov/census_2010/04-Summary_File_1/Alaska/ak2010.sf1.zip" ,
	sf1ak.tf ,
	mode = 'wb'
)
# note: to re-download a file from scratch, add the parameter usecache = FALSE
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# unzip the summary file #1 files
sf1ak.uz <- unzip( sf1ak.tf , exdir = tempdir() )

# file layout from http://www.census.gov/prod/cen2010/doc/sf1.pdf#page=18
sf1ak <- read.fwf( sf1ak.uz[ grep( "akgeo2010" , sf1ak.uz ) ] , c( -8 , 3 , -16 , 2 , 3 , -22 , 6 , 1 , 4 , -253 , 9 , -9 , 11 , 12 ) )

# add columns names matching the census bureau, so it's easy to read
names( sf1ak ) <- c( "sumlev" , "state" , "county" , "tract" , "blkgrp" , "block" , "pop100" , "intptlat" , "intptlon" )

# summary level 101 has census tracts and census blocks
sf1ak.101 <- subset( sf1ak , sumlev == "101" )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# merge these files together
sf1ak.101 <- merge( sf1ak.101 , ak.ctpxw )
# the number of records and population sums serve
# as a check to confirm that this merge worked

# one record per census block in alaska.  see?  same number.
nrow( sf1ak.101 )
# https://www.census.gov/geo/maps-data/data/tallies/census_block_tally.html

# and guess what?  the total alaska population matches as well.
sum( sf1ak.101$pop100 )
# http://quickfacts.census.gov/qfd/states/02000.html

# clear up RAM
rm( sf1ak ) ; gc()
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))


# so now we have a data.frame object with
# one record per census block,
# and also with the geography (puma)
# that matches the american community survey
head( sf1ak.101 )

# and guess what?
# we've now got the census 2010 weighted populations (field pop100)
# and also each census block's centroid latitude & longitude (fields intptlat + intptlon)

# # end of step 3 # #
# # # # # # # # # # #


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # step 4: merge the results of your survey analysis with the small-area geography # #

# confirm that we've created all possible geographies correctly.

# the number of records in our small area statistics..
sas.row <- nrow( sas )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# ..should equal the number of unique-match-merged records..
mrow <- nrow( merge( unique( sf1ak.101[ "puma" ] ) , sas ) )

# ..and it does/they do.
stopifnot( sas.row == mrow )

# now the census block-level alaska census data *could* merge if you wanted it to.

xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# but you don't.  yet.


# the standard error (the `se.` fields) are measures of precision.
print( sas )
# the smaller the standard error, the more confident you should be
# that the estimate at a particular geography is correct.
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))


# so invert them.  you heard me.  invert them.
sas$invse.gulf <- 1 / sas$se.gulf
sas$invse.vietnam <- 1 / sas$se.vietnam
sas$invse.other <- 1 / sas$se.other
# a smaller standard error indicates more precision.

# for our purposes, precision can be considered weight! #

# now we've got the weight that we should give each of our estimates #

# distribute that weight across all census blocks #


# aggregate the 2010 census block populations to the geographies that you have.
popsum <- aggregate( sf1ak.101$pop100 , by = ( sf1ak.101[ "puma" ] ) , sum )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# make the column name meaningful
names( popsum )[ names( popsum ) == 'x' ] <- 'popsum'

# merge the popsum onto the sasfile
sas <- merge( sas , popsum )

# now.  merge
	# the disproportionate veteran era in each puma (the variable of interest)
	# the inverted standard errors (the total weight of the broad geography)
	# the population sum (the total population of all census blocks that are part of that geography)

x <- merge( sf1ak.101 , sas )

# confirm no record loss
stopifnot( nrow( x ) == nrow( sf1ak.101 ) )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))


# (this is the fun part)
# calculate the weight at each census block
x$weight.gulf <- x$invse.gulf * ( x$pop100 / x$popsum )
x$weight.vietnam <- x$invse.vietnam * ( x$pop100 / x$popsum )
x$weight.other <- x$invse.other * ( x$pop100 / x$popsum )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# note that weight of all census blocks put together
# sums to the `invse` on the original analysis file
stopifnot( sum( x$weight.gulf ) == sum( sas$invse.gulf ) )
stopifnot( sum( x$weight.vietnam ) == sum( sas$invse.vietnam ) )
stopifnot( sum( x$weight.other ) == sum( sas$invse.other ) )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# remove records with zero population across all three measures
x <- subset( x , weight.gulf > 0 | weight.vietnam > 0 | weight.other > 0 )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# scale all weights so that they average to one
x$weight.gulf <- x$weight.gulf / mean( x$weight.gulf )
x$weight.vietnam <- x$weight.vietnam / mean( x$weight.viet )
x$weight.other <- x$weight.other / mean( x$weight.other )


# you're done preparing your data.
# keep only the columns you need.
x <- x[ , c( 'gulf', 'vietnam' , 'other' , 'weight.gulf' , 'weight.vietnam' , 'weight.other' , 'intptlat' , 'intptlon' ) ]

# pop quiz: which states are the furthest north, east, south, west?
# if you guessed alaska, maine, hawaii, alaska, you are wrong!
# the answer is alaska, alaska, hawaii, alaska.

# a few of the aleutians cross the international date line.

# do you want to keep the edges of the aleutian islands in your map?
# of course you do!  here's an ultra-simple recode to keep them gridded together.
x <- transform( x , intptlon = ifelse( intptlon > 0 , intptlon - 360 , intptlon ) )

xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# # end of step 4 # #
# # # # # # # # # # #


# # # # # # # # # # # #
# # step 5: outline # #

library(maptools)
library(raster)
library(rgeos)
library(rgdal)
library(ggplot2)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# make a character vector containing the shapefiles to download
shftd <-
	c(
		# download the clipped alaska public use microdata area map, described
		# https://www.census.gov/geo/maps-data/maps/2010puma/st02_ak.html
		'http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_02_puma10_500k.zip' ,

		# download the clipped nationwide state outlines
		'http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_state_500k.zip' ,

		# download the roads in alaska
		'http://www2.census.gov/geo/tiger/TIGER2013/PRISECROADS/tl_2013_02_prisecroads.zip'
	)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# initiate a function to download and import all census bureau shapefiles
daiacbsf <-
	function( fn , myproj = "+init=epsg:2163" ){
		tf <- tempfile()

		# # note: to re-download a file from scratch, add the parameter usecache = FALSE # #
		download_cached( fn , tf , mode = 'wb' )

		# unzip the downloaded file to a temporary directory
		shp.uz <- unzip( tf , exdir = tempdir() )

		# figure out which filename ends with "shp"
		sfname <- grep( 'shp$' , shp.uz , value = TRUE )

		# read in the shapefile, using the correct layer
		sf <- readOGR( sfname  , layer = gsub( "\\.shp" , "" , basename( sfname ) ) )

		# project this shapefile immediately
		# this projection (and a few others) keeps
		# the aleutian islands that cross the
		# international date line easy to work with.
		spTransform( sf , CRS( myproj ) )
	}
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# run all downloads at once, store the result in a list.
asf <- sapply( shftd , daiacbsf )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# pull out the clipped state borders of alaska only
alaska.borders <- subset( asf[[2]] , STATEFP == '02' )

# plot as-is.  see how the aleutians screw up the map?
plot( alaska.borders )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# add puma boundaries
plot( asf[[1]] , add = TRUE )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# refresh the map with state borders only
plot( alaska.borders )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# add roads
plot( asf[[3]] , add = TRUE , col = 'red' )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# draw a rectangle 15% bigger than the original state
ak.shp.blank <- as( 1.3 * extent( alaska.borders ) , "SpatialPolygons" )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# calculate the difference between the rectangle and the actual shape
ak.shp.diff <- gDifference( ak.shp.blank , alaska.borders )
# this will be used to cover up points outside of alaska's state borders
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# this box will later blank out the surrounding area
plot( ak.shp.diff )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# # end of step 5 # #
# # # # # # # # # # #


# # # # # # # # # # # # # # # # # #
# # step 6: tie knots and krige # #
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

library(sqldf)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# # warning warning # # # # warning warning # #
# alaska has a vast geography and highly skewed population centers
# kriging functions might not converge.  that's why there are other options ;)
# # warning warning # # # # warning warning # #


# how many knots should you make? #

# knots are the computationally-intensive part of this process,
# choose as many as your computer and your patience can handle.

# you should aim for between 100 - 999 knots,
# but numbers closer to 1,000 will overload smaller computers

# you could let the `fields` package attempt to guess knots for you,
# xknots <- cover.design( cbind( x$intptlon , x$intptlat ) , 100 )$design
# but with census microdata, you've already got easy access to a relevant geographic grouping

# the sqldf() function doesn't like `.` in data.frame object names
sf1s <- sf1ak.101
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# exactamundo same transform operation as you saw previously on `x`
sf1s <- transform( sf1s , intptlon = ifelse( intptlon > 0 , intptlon - 360 , intptlon ) )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# within each county x census tract
# calculate the population-weighted mean of the coordinates
ct.knots <-
	sqldf(
		"select
			county , tract ,
			sum( pop100 ) as pop100 ,
			sum( pop100 * intptlon ) / sum( pop100 ) as intptlon ,
			sum( pop100 * intptlat ) / sum( pop100 ) as intptlat
		from sf1s
		group by
			county , tract"
	)
# note: this screws up coordinates that cross the international date line
# or the equator.  in the united states, only alaska's aleutian islands do this
# and we're mapping alaska, aren't we?  good thing we fixed it, huh?

xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# interpolation option one #
library(fields)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

krig.fit.gulf <-
	Krig(
		cbind( x$intptlon , x$intptlat ) ,
		Y = x$gulf ,
		weights = x$weight.gulf ,
		knots = cbind( ct.knots$intptlon , ct.knots$intptlat )
		# if you prefer to use cover.design, all you'd need is this knots= line instead:
		# knots = xknots
	)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

krig.fit.vietnam <-
	Krig(
		cbind( x$intptlon , x$intptlat ) ,
		x$vietnam ,
		weights = x$weight.vietnam ,
		knots = cbind( ct.knots$intptlon , ct.knots$intptlat )
		# if you prefer to use cover.design, all you'd need is this knots= line instead:
		# knots = xknots
	)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

krig.fit.other <-
	Krig(
		cbind( x$intptlon , x$intptlat ) ,
		x$other ,
		weights = x$weight.other ,
		knots = cbind( ct.knots$intptlon , ct.knots$intptlat )
		# if you prefer to use cover.design, all you'd need is this knots= line instead:
		# knots = xknots
	)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))


# that is: what is the (weighted) relationship between
# your variable of interest (veteran service eras) and
# the x/y points on a grid?
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# check this out!
surface( krig.fit.gulf )
surface( krig.fit.vietnam )
surface( krig.fit.other )
# you're almost there!
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))


# interpolation option two #
library(mgcv)

xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

gam.gulf <-
	gam(
		gulf ~ s( intptlon , intptlat ) ,
		weights = weight.gulf ,
		data = x
	)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

gam.vietnam <-
	gam(
		vietnam ~ s( intptlon , intptlat ) ,
		weights = weight.vietnam ,
		data = x
	)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

gam.other <-
	gam(
		other ~ s( intptlon , intptlat ) ,
		weights = weight.other ,
		data = x
	)
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# # end of step 6 # #
# # # # # # # # # # #


# # # # # # # # # # # # # # # # # # # #
# # step 7: make a grid and predict # #

# use as fine of a grid as your computer can handle
grid.length <- 750
# # note: smaller grids will render faster
# # (so they're better if you're just playing around)
# # but larger grids will prevent your final plot from
# # being too pixelated, even when zooming in

xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

x.range <- c( min( x$intptlon ) , max( x$intptlon ) )
y.range <- c( min( x$intptlat ) , max( x$intptlat ) )

# add five percent on each side
x.diff <- abs( x.range[ 2 ] - x.range[ 1 ] ) * 0.2
y.diff <- abs( y.range[ 2 ] - y.range[ 1 ] ) * 0.2

x.range[ 1 ] <- x.range[ 1 ] - x.diff
x.range[ 2 ] <- x.range[ 2 ] + x.diff
y.range[ 1 ] <- y.range[ 1 ] - y.diff
y.range[ 2 ] <- y.range[ 2 ] + y.diff

xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

grd <- krig.grd <- gam.grd <-
	expand.grid(
		intptlon = seq( x.range[ 1 ] , x.range[ 2 ] , length = grid.length ) ,
		intptlat = seq( y.range[ 1 ] , y.range[ 2 ] , length = grid.length )
	)

	xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# along your rectangular grid,
# what are the predicted values of
# each veteran era category
krig.grd$gulf <- predict( krig.fit.gulf , krig.grd[ , 1:2 ] )

krig.grd$vietnam <- predict( krig.fit.vietnam , krig.grd[ , 1:2 ] )

krig.grd$other <- predict( krig.fit.other , krig.grd[ , 1:2 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

gam.grd$gulf <- predict( gam.gulf , gam.grd[ , 1:2 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

gam.grd$vietnam <- predict( gam.vietnam , gam.grd[ , 1:2 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

gam.grd$other <- predict( gam.other , gam.grd[ , 1:2 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))


# remember that these values have been re-scaled
# as how disproportionate they are from the state-wide averages.
# therefore, negative values are possible.
sapply( krig.grd , summary )
sapply( gam.grd , summary )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# what we're really hoping for is that
# the overall mean averages out to zero
sum( sapply( gam.grd , summary )[ 4 , 3:5 ] )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# in general, these predictions at each point should approximately sum to zero
summary( rowSums( krig.grd[ , 3:5 ] ) )
summary( rowSums( gam.grd[ , 3:5 ] ) )
xyz <- structure(c(0.00251355321405019, -0.000589785531216647, -0.000172411748626129, -0.000589785531217227, 0.000897505637785858, -0.000714600035538855, -0.000172411748626269, -0.000714600035538766, 0.00123946691634644), .Dim = c(3L, 3L))
.Internal(La_rs(xyz,FALSE))

# # end of step 7 # #
# # # # # # # # # # #


# # # # # # # # # # # # # # # # # # # # # #
# # step 8: limit information and color # #


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # warning # # # warning # # # # # # warning # # # # # # warning # # # # # # warning # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# if your data is are not binomial, then by mapping with a single image, you lose clarity #
# if you have three levels of information and you generate two maps, you can get an idea  #
# about the entire distribution of the variable.  if you attempt encoding three levels or #
# more into a single map, you will explode. just kidding rofl lmao but you will lose info #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # warning # # # warning # # # # # # warning # # # # # # warning # # # # # # warning # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #


library(scales)

# from among the three categories, find the maximum disproportionate share
krig.grd$svccat <- c( 'gulf' , 'vietnam' , 'other' )[ apply( krig.grd[ , 3:5 ] , 1 , which.max ) ]

# save only that max
krig.grd$statistic <- apply( krig.grd[ , 3:5 ] , 1 , max )

# it's important to note that i've thrown out a lot of information here
krig.grd <- krig.grd[ , c( 'intptlon' , 'intptlat' , 'statistic' , 'svccat' ) ]

# do any points not make sense?
summary( krig.grd$statistic )

# yup, the minimum is below zero.
krig.grd$statistic <- pmax( 0 , krig.grd$statistic )


# from among the three categories, find the maximum disproportionate share
gam.grd$svccat <- c( 'gulf' , 'vietnam' , 'other' )[ apply( gam.grd[ , 3:5 ] , 1 , which.max ) ]

# save only that max
gam.grd$statistic <- apply( gam.grd[ , 3:5 ] , 1 , max )

# it's important to note that i've thrown out a lot of information here
gam.grd <- gam.grd[ , c( 'intptlon' , 'intptlat' , 'statistic' , 'svccat' ) ]

# again, do any points not make sense?
summary( gam.grd$statistic )

# another point below zero.
gam.grd$statistic <- pmax( 0 , gam.grd$statistic )

# our complex sample survey-computed statistics rely on categories,
# but the final map only shows the _highest_ disproportionate item
# from each puma.  for example,

# puma 300 is slightly disproportionately more gulf veterans
# and it's also near evenly-split between being
# slightly disproportionately less vietnam vets and
# slightly disproportionately less other era vets

# puma 101 is disproportionately more gulf vets too,
# but it has very heavily disproportionately fewer vietnam vets
# and has close to state-average veterans from other eras.
sas

# only the "disproportionately more" share variable gets retained
# in these predictions.  all other information gets thrown away.
# this is the nature of mapping categorical variables

# if you are intent on showing a multi-color gradient with all information,
# you can use the rgb() function, but fair warning:
# the values mush together quickly and your map will probably look like ass.
# i tried building color gradients to map multi-dimensional categorical values
# like the multi-category values in
ps
# but on the color gradient, the red/green/blue values on the palette tend to mush together.
# for example, on this plot right here..
plot( 1:5 , rep( 1 , 5 ) , cex = 3 , pch = 16 , col = mapply( rgb , ps[ , 2 ] , ps[ , 3 ] , ps[ , 4 ] ) )
# red is gulf veterans, green is vietnam veterans, blue is other veterans.
# the colors end up just looking drab.

# even when re-scaled..
rsps <- apply( ps[ , 2:4 ] , 2 , rescale )
# ..the points with high relative rates in two categories (because they're the lowest in the third)
# have a lot of mixture (puma 300) and are therefore indecipherable.  what is the color brown here?
plot( 1:5 , rep( 1 , 5 ) , cex = 3 , pch = 16 , col = mapply( rgb , rsps[ , 1 ] , rsps[ , 2 ] , rsps[ , 3 ] ) )
# high vietnam era and also high gulf era service.
text( 1:5 , rep( 1.2 , 5 ) , ps[ , 1 ] )
# multi-dimensional categorical variable coloring is a nightmare.

# you have to simplify it.
# simplifying it means throwing out information.

# now where were we?


library(RColorBrewer)

# draw three gradients
tg <-
	lapply(
		brewer.pal( 3 , 'Set1' ) ,
		function( z ) colorRampPalette( c( 'white' , z ) )( 101 )
	)

# check out each of these three colors, mapped from opaque to intense.
plot( rep( 0:100 , 3 ) , rep( c( -1 , 0 , 1 ) , each = 101 ) , col = unlist( tg ) , pch = 16 , cex = 3 )

# draw an alternate three gradients
# that start at ~20% ( that is: 25 / 125 )
# and also use a different palette from colorbrewer2.org
tag <-
	lapply(
		brewer.pal( 3 , 'Dark2' ) ,
		function( z ) colorRampPalette( c( 'white' , z ) )( 125 )[ 25:125 ]
	)

# check out each of these three colors, mapped from opaque to intense.
plot( rep( 0:100 , 3 ) , rep( c( -1 , 0 , 1 ) , each = 101 ) , col = unlist( tag ) , pch = 16 , cex = 3 )


# # rescale both of the interpolated grids
krig.grd$statistic <- krig.grd$statistic * ( 1 / max( krig.grd$statistic ) )
gam.grd$statistic <- gam.grd$statistic * ( 1 / max( gam.grd$statistic ) )
# note that the re-scaling gets done across all categories,
# and not individually within each category.

# add the hex color identifier
krig.grd$color.value <-
		ifelse( krig.grd$svccat == 'gulf' , tg[[1]][ round( krig.grd$statistic * 100 ) ] ,
		ifelse( krig.grd$svccat == 'vietnam' , tg[[2]][ round( krig.grd$statistic * 100) ] ,
		ifelse( krig.grd$svccat == 'other' , tg[[3]][ round( krig.grd$statistic * 100 ) ] ,
			NA ) ) )

# awwwwwwww yeah, something's happening now.
plot( krig.grd$intptlon , krig.grd$intptlat , col = krig.grd$color.value , pch = 16 , cex = 3 )

# add the alternate hex color identifier
krig.grd$alt.color <-
		ifelse( krig.grd$svccat == 'gulf' , tag[[1]][ round( krig.grd$statistic * 100 ) ] ,
		ifelse( krig.grd$svccat == 'vietnam' , tag[[2]][ round( krig.grd$statistic * 100) ] ,
		ifelse( krig.grd$svccat == 'other' , tag[[3]][ round( krig.grd$statistic * 100 ) ] ,
			NA ) ) )

# that looks a bit better to me
plot( krig.grd$intptlon , krig.grd$intptlat , col = krig.grd$alt.color , pch = 16 , cex = 3 )


# lower-bound the alternate color to remove the white lines
krig.grd$bound.color <-
		ifelse( krig.grd$svccat == 'gulf' , tag[[1]][ pmax( 5 , round( krig.grd$statistic * 100 ) ) ] ,
		ifelse( krig.grd$svccat == 'vietnam' , tag[[2]][ pmax( 5 , round( krig.grd$statistic * 100) ) ] ,
		ifelse( krig.grd$svccat == 'other' , tag[[3]][ pmax( 5 , round( krig.grd$statistic * 100 ) ) ] ,
			NA ) ) )

# that's smoothing by hand for you.
plot( krig.grd$intptlon , krig.grd$intptlat , col = krig.grd$bound.color , pch = 16 , cex = 3 )


# put that color band on the `gam.grd` data.frame as well
gam.grd$bound.color <-
		ifelse( gam.grd$svccat == 'gulf' , tag[[1]][ pmax( 5 , round( gam.grd$statistic * 100 ) ) ] ,
		ifelse( gam.grd$svccat == 'vietnam' , tag[[2]][ pmax( 5 , round( gam.grd$statistic * 100) ) ] ,
		ifelse( gam.grd$svccat == 'other' , tag[[3]][ pmax( 5 , round( gam.grd$statistic * 100 ) ) ] ,
			NA ) ) )


# # end of step 8 # #
# # # # # # # # # # #


# # # # # # # # # # # # # # # # # # # # #
# # step 9: ggplot and choose options # #

library(ggplot2)
library(mapproj)
library(scales)


# initiate the krige-based plot
krig.grd$color.column <- as.factor( krig.grd$bound.color )

krg.plot <-
	ggplot( data = krig.grd , aes( x = intptlon , y = intptlat ) ) +
	geom_point( shape = 15 , colour = krig.grd$color.column ) +
	scale_fill_manual( values = unique( krig.grd$bound.color ) )


# initiate the gam-based plot
gam.grd$color.column <- as.factor( gam.grd$bound.color )

gam.plot <-
	ggplot( data = gam.grd , aes( x = intptlon , y = intptlat ) ) +
	geom_point( shape = 15 , colour = gam.grd$color.column ) +
	scale_fill_manual( values = unique( gam.grd$bound.color ) )

# view both grids!
krg.plot
gam.plot


# initiate the entire plot
the.plot <-

	# choose only one of the two interpolation grids
	krg.plot +
	# gam.plot +

	# blank out the legend and axis labels
	theme(
		legend.position = "none" ,
		axis.title.x = element_blank() ,
		axis.title.y = element_blank()
	) +

	xlab( "" ) + ylab( "" ) +

	# force the x and y axis limits at the shape of the city and don't do anything special for off-map values
	scale_x_continuous( limits = c( -191 , -127 ) , breaks = NULL , oob = squish ) +
	# since we're going to add lots of surrounding-area detail!
    scale_y_continuous( limits = c( 50 , 73 ) , breaks = NULL , oob = squish ) +

	theme(
		panel.grid.major = element_blank(),
		panel.grid.minor = element_blank(),
		panel.background = element_blank(),
		panel.border = element_blank(),
		axis.ticks = element_blank()
	)

# print the plot to the screen
the.plot
# this is the bottom layer.


# initiate an aleutian islands-focused wrap-around function
s360 <- function( z ){ z[ z$long > 0 , 'long' ] <- z[ z$long > 0 , 'long' ] - 360 ; z }


# # alaskan state borders # #

# convert the alaskan borders to longlat,
# prepare for ggplot2 with `fortify`
# wrap edge points around
ab <- s360( fortify( spTransform( alaska.borders , CRS( "+proj=longlat" ) ) ) )

# store this information in a layer
state.border.layer <- geom_path( data = ab , aes( x = long , y = lat , group = group ) , colour = 'darkgrey' )

# plot the result
the.plot + state.border.layer


# # alaskan main roads # #

# convert the alaskan borders to longlat,
# prepare for ggplot2 with `fortify`
# wrap edge points around
akr <- s360( fortify( spTransform( asf[[3]] , CRS( "+proj=longlat" ) ) ) )

# store this information in a layer
state.roads.layer <- geom_path( data = akr , aes( x = long , y = lat , group=group ) , colour = 'darkred' )

# plot the result
the.plot + state.border.layer + state.roads.layer

# # end of step 9 # #
# # # # # # # # # # #


# # # # # # # # # # # # # # # # # # # # #
# # step 10: project, blank, and save # #

library(ggplot2)
library(scales)
library(raster)
library(plyr)
library(rgeos)


# exclude outer alaska if you hate the wilderness or something
the.plot + state.border.layer + coord_cartesian( xlim = c( -155 , max( x$intptlon ) ) , ylim = c( min( x$intptlat ) , 70 ) )

# distort the map with simple latitude/longitude scaling
the.plot + state.border.layer + coord_fixed( 2.5 )

# this looks crappy, who knows what it is
the.plot + state.border.layer + coord_equal()

# check out a bunch of other options #
the.plot + state.border.layer + coord_map( project = "cylequalarea" , mean( x$intptlat ) )

# here's the one that makes the most sense for alaska
the.plot + state.border.layer + coord_map( project = "conic" , mean( x$intptlat ) , orientation = c( 90 , 0 , -141 ) )

# see ?mapproject and the ?coord_* functions for a zillion alternatives

# store this projection, but not the state border
the.plot <- the.plot + coord_map( project = "conic" , mean( x$intptlat ) , orientation = c( 90 , 0 , -141 ) )
# into `the.plot`


# force the difference shapefile's projection
proj4string( ak.shp.diff ) <- "+init=epsg:2163"

# initiate the outside blanking layer
outside <- s360( fortify( spTransform( ak.shp.diff , CRS( "+proj=longlat" ) ) ) )

# fix islands piecing together
outside2 <- ddply( outside , .( piece ) , function( x ) rbind( x , outside[ 1 , ] ) )

# convert this fortified object to a ggplot layer
outside.layer <- geom_polygon( data = outside2 , aes( x = long , y = lat , group = id ) , fill = 'white' )

# plot this -- the layer doesn't work, does it?
the.plot + outside.layer

# five points need to change so we have a real bounding box.
subset( outside , lat < 45 | lat > 75 | long < -190 | long > -125 )

# move all of them counter-clockwise by hand
outside[ outside$order %in% c( 1 , 5 ) , 'long' ] <- -116.6568
# outside[ outside$order %in% c( 1 , 5 ) , 'lat' ] <- 20

# outside[ outside$order %in% 4 , 'long' ] <- -220
outside[ outside$order %in% 4 , 'lat' ] <- 37.56767

outside[ outside$order %in% 3 , 'long' ] <- -195.4295
# outside[ outside$order %in% 3 , 'lat' ] <- 100

# outside[ outside$order %in% 2 , 'long' ] <- -100
outside[ outside$order %in% 2 , 'lat' ] <- 79.36447


# fix islands piecing together
outside2 <- ddply( outside , .( piece ) , function( x ) rbind( x , outside[ 1 , ] ) )

# convert this fortified object to a ggplot layer
outside.layer <- geom_polygon( data = outside2 , aes( x = long , y = lat , group = id ) , fill = 'white' )

# plot this.
the.plot + outside.layer
# that's not so bad, i guess.

# i don't care for the state border layer,
# but if you want the state border layer,
# use this save line:
final.plot <- the.plot + outside.layer + state.border.layer
# otherwise use this save line:
# final.plot <- the.plot + outside.layer
# you can airbrush the outside blue border
# in microsoft paint or something
# if you want, right? like a boss.


# save the file to your current working directory
ggsave(
	"2013 alaskan veteran service eras.png" ,
	plot = final.plot ,
	scale = 3
)
# happy?

# # end of step ten # #
# # # # # # # # # # # #