Skip to content

Instantly share code, notes, and snippets.

@rhhackett
Created December 18, 2013 20:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rhhackett/8029094 to your computer and use it in GitHub Desktop.
Save rhhackett/8029094 to your computer and use it in GitHub Desktop.
Data II Final
library(ggplot2)
dc = read.csv("bob.csv")
library(mapproj)
library(maps)
library(maptools)
plot(dc$lon,dc$lat,pch='.')
# turn on ggmap and ggplot2 in "Packages" (lower right box of R Studio)
qmplot(lon,lat,data=dc)
# can't figure out how to get open street maps rather than google maps to work
# qmplot(lon,lat,data=dc,source="osm")
myplot <- qmplot(lon, lat, data=dc)
# need to figure out how to scrape sq footage and power capacity
# size of dot based on space; color based on power
# reduce xlim and ylim? map seems to big
# created new dataset with sqft, number of listed tenants, number of listed carriers
dcnumbers = read.csv("datacentermap_cleaner.csv")
> View(dcnumbers)
# merged new dataset with the one that has long/lat data
> dcfull = merge(dc,dcnumbers,by="name")
> View(dcfull)
# for some reason, some 32 ave of the americas locations are appearing on long island, so must correct by providing
# correct coords for manhattan. for instance, by changing single column value of NYC3 TELx New York Data Center to those of CoreSite New York
> dcfull[31,2]
[1] 40.76937
dcfull[31,2] = 40.72105
dcfull[31,2]
[1] 40.72105
View(dcfull)
dcfull[31,3]
[1] -73.80322
dcfull[31,3] = -74.00524
dcfull[31,3]
[1] -74.00524
View(dcfull)
# same for Tata communications
dcfull[36,2]
[1] 40.76937
dcfull[36,2] = 40.72105
dcfull[36,2]
[1] 40.72105
dcfull[36,3]
[1] -73.80322
dcfull[36,3] = -74.00524
dcfull[36,3]
[1] -74.00524
View(dcfull)
# have to remove outlier data point, New York Internet DC1, which appears in upstate NY: should be at 100 Williams St, NY, NY
dcfull = dcfull[-30,]
View(dcfull)
# replot with google maps
qmplot(lon, lat, data=dcfull)
# create myplot using number of tenants as color option
myplot <- qmplot(lon, lat, data=dcfull, color=listedtenants)
# fill in the colors
nycmap <- qmplot(lon, lat, data=dcfull, color=listedtenants, size=sqft, darken=c(.3,"white"))
nycmap + scale_colour_gradient(low="red", high="blue")
# map points where color indicates number of listed carriers (blue = less, red = more), and where size indicates sqft
nycmap <- qmplot(lon, lat, data=dcfull, color=listedcarriers, size=sqft, darken=c(.4,"white"))
nycmap + scale_colour_gradient(low="blue", high="red")
# na values are showing up as the biggest blots. let's change na to NA
dcfull$sqft[dcfull$sqft == "na"] = NA
# replot
nycmap <- qmplot(lon, lat, data=dcfull, color=listedcarriers, size=sqft, darken=c(.4,"white"))
nycmap + scale_colour_gradient(low="blue", high="red")
# whoops this removes 34 data points that had NA. Let's just plot NAs as if they has 1000 sqft
# ...troubleshooting with mark...have to turn sqft from a factor to something numeric
dcfull_working$sqft = as.numeric(as.character(dcfull_working$sqft))
# have to replace NA missing values with 0, otherwise our map will exclude these 34 points
dcfull_working$sqft[is.na(dcfull_working$sqft)] = 0
View(dcfull_working)
# running into an issue...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment