Skip to content

Instantly share code, notes, and snippets.

@SnehalGawas
Created January 23, 2018 22:39
Show Gist options
  • Save SnehalGawas/e2e078dc53f755d2d79878e5d60ad81b to your computer and use it in GitHub Desktop.
Save SnehalGawas/e2e078dc53f755d2d79878e5d60ad81b to your computer and use it in GitHub Desktop.
#Import required Libraries
library(ggmap)
library(zipcode)
library(rgeos)
library(sp)
library(maptools)
#Read Data
DF<-data.table::fread('/home/rstudio/mean houshold income.csv')
head(DF)
# Clean Data
names(DF) <- c('Zip','Median_income','Population')
DF$Zip<-as.factor(DF$Zip)
DF$Median_income <- gsub(pattern = ',', replacement = '',x = DF$Median_income)
DF$Median_income <- as.numeric(DF$Median_income)
DF$Population <- gsub(pattern = ',', replacement = '',x = DF$Population)
DF$Population <- as.numeric(DF$Population)
head(DF)
# Extract longitude and latitude from available zipcodes
data(zipcode)
DF=merge(DF,zipcode,by.x='Zip',by.y='zip')
head(DF)
# Plot data on USA map
map<- get_map(location = 'united_states',zoom=4,maptype = 'terrain',source = 'google',color = 'color')
ggmap(map) +
geom_point(aes(x=longitude,y=latitude
,color = Median_income)
,data=DF
,na.rm = T
,size = .5
) +scale_color_gradient(low="coral", high="blue")
#Get highest and lowest income points
x_max=DF[DF$Median_income==max(DF$Median_income),longitude]
y_max=DF[DF$Median_income==max(DF$Median_income),latitude]
x_min=DF[DF$Median_income==min(DF$Median_income),longitude]
y_min=DF[DF$Median_income==min(DF$Median_income),latitude]
#Read shape file
crswgs84=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
states_shape <- readShapePoly("/home/rstudio/states_21basic/states",proj4string=crswgs84,verbose=TRUE)
plot(states_shape,border="wheat3", col="wheat1")
# take subset of East-North-Central region
East_North_Central<-subset(states_shape,STATE_NAME=="Illinois" | STATE_NAME=="Indiana"|STATE_NAME=='Michigan'|STATE_NAME=='Ohio'|STATE_NAME=='Wisconsin')
plot(East_North_Central,border="wheat3", col="wheat1")
text(East_North_Central,East_North_Central$STATE_ABBR,cex=0.8)
# plot high and low income points
plot(states_shape,border="wheat3", col="wheat1")
text(states_shape,states_shape$STATE_ABBR,cex=0.4)
points(x_max,y_max,col='red',pch=16)
points(x_min,y_min,col='green',pch=16)
# Convert Latitude and longitude into SpatialPoints objects
co_max = cbind(x_max,y_max)
co_min= cbind(x_min,y_min)
pt_max_income = SpatialPoints(co_max,proj4string = crswgs84)
pt_min_income = SpatialPoints(co_min,proj4string = crswgs84)
res_high <- colSums(gContains(states_shape, pt_max_income, byid = TRUE))
HIgh_median_Income=setNames(res_high, states_shape@data$STATE_NAME)
HIgh_median_Income[HIgh_median_Income>0]
res_low <- colSums(gContains(states_shape, pt_min_income, byid = TRUE))
Low_median_Income=setNames(res_low, states_shape@data$STATE_NAME)
Low_median_Income[Low_median_Income>0]
gDistance(pt_max_income,pt_min_income)
# divide data into high and low groups
High_income=DF[(DF$Median_income>summary(DF$Median_income)[5])]
low_income=DF[(DF$Median_income<summary(DF$Median_income)[2])]
#convert dataframe to SpatialPointsDataFrame
xy_high <- High_income[,c(6,7)]
spdf_high <- SpatialPointsDataFrame(coords = xy_high, data = High_income,proj4string = crswgs84)
xy_low <- low_income[,c(6,7)]
spdf_low <- SpatialPointsDataFrame(coords = xy_low, data = low_income,proj4string = crswgs84)
high_poly=gConvexHull(spdf_high)
low_poly=gConvexHull(spdf_low)
gIsValid(high_poly,reason=TRUE)
gIsValid(low_poly,reason=TRUE)
plot(high_poly,border='red')
plot(gIntersection(low_poly,high_poly),add=TRUE,col='beige')
plot(gCentroid(high_poly),col='red',add=TRUE)
plot(gCentroid(low_poly),col='blue',add=TRUE)
gArea(high_poly)
gLength(high_poly)
gBoundary(high_poly)
#Read counties and centroids shape file
counties <- readShapePoly("counties")
centroids <- readShapePoints("centroids")
plot(counties, border="wheat3", col="wheat1")
points(centroids,col='red',pch=16,cex=0.3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment