Created
December 18, 2013 20:11
-
-
Save rhhackett/8029094 to your computer and use it in GitHub Desktop.
Data II Final
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
dc = read.csv("bob.csv") | |
library(mapproj) | |
library(maps) | |
library(maptools) | |
plot(dc$lon,dc$lat,pch='.') | |
# turn on ggmap and ggplot2 in "Packages" (lower right box of R Studio) | |
qmplot(lon,lat,data=dc) | |
# can't figure out how to get open street maps rather than google maps to work | |
# qmplot(lon,lat,data=dc,source="osm") | |
myplot <- qmplot(lon, lat, data=dc) | |
# need to figure out how to scrape sq footage and power capacity | |
# size of dot based on space; color based on power | |
# reduce xlim and ylim? map seems to big | |
# created new dataset with sqft, number of listed tenants, number of listed carriers | |
dcnumbers = read.csv("datacentermap_cleaner.csv") | |
> View(dcnumbers) | |
# merged new dataset with the one that has long/lat data | |
> dcfull = merge(dc,dcnumbers,by="name") | |
> View(dcfull) | |
# for some reason, some 32 ave of the americas locations are appearing on long island, so must correct by providing | |
# correct coords for manhattan. for instance, by changing single column value of NYC3 TELx New York Data Center to those of CoreSite New York | |
> dcfull[31,2] | |
[1] 40.76937 | |
dcfull[31,2] = 40.72105 | |
dcfull[31,2] | |
[1] 40.72105 | |
View(dcfull) | |
dcfull[31,3] | |
[1] -73.80322 | |
dcfull[31,3] = -74.00524 | |
dcfull[31,3] | |
[1] -74.00524 | |
View(dcfull) | |
# same for Tata communications | |
dcfull[36,2] | |
[1] 40.76937 | |
dcfull[36,2] = 40.72105 | |
dcfull[36,2] | |
[1] 40.72105 | |
dcfull[36,3] | |
[1] -73.80322 | |
dcfull[36,3] = -74.00524 | |
dcfull[36,3] | |
[1] -74.00524 | |
View(dcfull) | |
# have to remove outlier data point, New York Internet DC1, which appears in upstate NY: should be at 100 Williams St, NY, NY | |
dcfull = dcfull[-30,] | |
View(dcfull) | |
# replot with google maps | |
qmplot(lon, lat, data=dcfull) | |
# create myplot using number of tenants as color option | |
myplot <- qmplot(lon, lat, data=dcfull, color=listedtenants) | |
# fill in the colors | |
nycmap <- qmplot(lon, lat, data=dcfull, color=listedtenants, size=sqft, darken=c(.3,"white")) | |
nycmap + scale_colour_gradient(low="red", high="blue") | |
# map points where color indicates number of listed carriers (blue = less, red = more), and where size indicates sqft | |
nycmap <- qmplot(lon, lat, data=dcfull, color=listedcarriers, size=sqft, darken=c(.4,"white")) | |
nycmap + scale_colour_gradient(low="blue", high="red") | |
# na values are showing up as the biggest blots. let's change na to NA | |
dcfull$sqft[dcfull$sqft == "na"] = NA | |
# replot | |
nycmap <- qmplot(lon, lat, data=dcfull, color=listedcarriers, size=sqft, darken=c(.4,"white")) | |
nycmap + scale_colour_gradient(low="blue", high="red") | |
# whoops this removes 34 data points that had NA. Let's just plot NAs as if they has 1000 sqft | |
# ...troubleshooting with mark...have to turn sqft from a factor to something numeric | |
dcfull_working$sqft = as.numeric(as.character(dcfull_working$sqft)) | |
# have to replace NA missing values with 0, otherwise our map will exclude these 34 points | |
dcfull_working$sqft[is.na(dcfull_working$sqft)] = 0 | |
View(dcfull_working) | |
# running into an issue... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment