Skip to content

Instantly share code, notes, and snippets.

@jdavidson
Created January 24, 2014 03:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdavidson/8591371 to your computer and use it in GitHub Desktop.
Save jdavidson/8591371 to your computer and use it in GitHub Desktop.
geographic visualization of start up fund raising trends
require(ggmap)
require(animation)
library(ggplot2)
library(ggthemes)
library(dplyr)
library(lubridate)
library(scales)
library(data.table)
library(reshape2)
options(scipen=999)
options(stringsAsFactors = FALSE)
rounds <- read.csv("2014-01-06-crunchbase_monthly_export_rounds.csv")
# dedup
rounds <- data.table(rounds)
setkeyv(rounds, c("company_name", "funded_at", "funding_round_type"))
rounds <- unique(rounds)
rounds <- subset(rounds, funded_month != "1960-01")
# fix strange date data
rounds$funded_at <- ymd(paste(rounds$funded_month, "01", sep="-"))
# clean up rounds
rounds <- filter(rounds, company_country_code == "USA", company_state_code != "")
rounds$company_region <- toupper(gsub(" - Other", "", rounds$company_region))
rounds$company_city <- toupper(gsub("[^[:alnum:]///' ]", "", rounds$company_city))
rounds[company_region == "SF BAY"]$company_state_code <- "CA"
rounds[company_region == "NEW YORK"]$company_state_code <- "NY"
rounds[company_region == "LOS ANGELES"]$company_state_code <- "CA"
rounds <- filter(rounds, !company_region %in% c("UNKNOWN", "TBD"))
rounds$geocode <- paste(rounds$company_region, rounds$company_state_code, rounds$company_country_code)
# regions <- unique(rounds$geocode)
# geocodes <- geocode(regions)
# regions <- data.frame(geocode=regions, geocodes)
# write.csv(regions, "regions-geocoded.csv")
regions <- read.csv("regions-geocoded.csv")
geocode_summary <- rounds[, list(rounds=length(funded_at), raised_amount_usd=sum(raised_amount_usd, na.rm=T)), by=c("geocode", "funded_year")]
geocode_summary <- inner_join(geocode_summary, regions)
us_map <- get_map(location="united states", zoom=4, scale=2, maptype="roadmap", color="bw")
years <- seq(2005, 2013, 1)
saveGIF({
for(year in years) {
print(ggmap(us_map) + theme(legend.position=c(.9,.2),legend.key = element_rect(fill=alpha("white", .2)), legend.background = element_rect(fill=alpha("white", .9)), axis.line=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank()) + ggtitle(paste(year)) + geom_point(data=filter(geocode_summary, funded_year == year), aes(x=lon, y=lat, size=raised_amount_usd / 1e9), alpha=.6, color="blue") + scale_size_area(name="Billions ($)", max_size=20, limits=c(1, max(geocode_summary$raised_amount_usd / 1e9))))
}
}, interval=2, movie.name="raised-year.gif", ani.width=640, ani.height=640)
plot <- ggmap(us_map) + theme(legend.position=c(.9,.2),legend.key = element_rect(fill=alpha("white", .2)), legend.background = element_rect(fill=alpha("white", .9)), axis.line=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank()) + ggtitle(paste(year)) + geom_point(data=filter(geocode_summary, funded_year == year), aes(x=lon, y=lat, size=raised_amount_usd / 1e9), alpha=.6, color="blue") + scale_size_area(name="Billions ($)", max_size=20, limits=c(1, max(geocode_summary$raised_amount_usd / 1e9)))
ggsave("raised-2013.png", plot, width=640 / 72, height=640 / 72, dpi=72)
saveGIF({
for(year in years) {
print(ggmap(us_map) + theme(legend.position=c(.9,.2), legend.key = element_rect(fill=alpha("white", .2)), legend.background = element_rect(fill=alpha("white", .9)), axis.line=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank()) + ggtitle(paste(year)) + geom_point(data=filter(geocode_summary, funded_year == year), aes(x=lon, y=lat, size=rounds), alpha=.6, color="blue") + scale_size_area(name="Rounds", max_size=20, limits=c(1, max(geocode_summary$rounds))))
}
}, interval=2, movie.name="rounds-year.gif", ani.width=640, ani.height=640)
### California
rounds <- filter(rounds, company_state_code == "CA")
rounds$geocode <- paste(rounds$company_city, rounds$company_state_code, rounds$company_country_code)
# cities <- unique(rounds$geocode)
# geocodes <- geocode(cities)
# cities <- data.frame(geocode=cities, geocodes)
# write.csv(cities, "ca-cities-geocoded.csv")
cities <- read.csv("ca-cities-geocoded.csv")
geocode_summary <- rounds[, list(rounds=length(funded_at), raised_amount_usd=sum(raised_amount_usd, na.rm=T)), by=c("geocode", "funded_year")]
geocode_summary <- inner_join(geocode_summary, cities)
ca_map <- get_map(location="california", zoom=6, scale=2, maptype="roadmap", color="bw")
saveGIF({
for(year in years) {
print(ggmap(ca_map) + theme(legend.position=c(.9,.2),legend.key = element_rect(fill=alpha("white", .2)), legend.background = element_rect(fill=alpha("white", .9)), axis.line=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank()) + ggtitle(paste(year)) + geom_point(data=filter(geocode_summary, funded_year == year), aes(x=lon, y=lat, size=raised_amount_usd / 1e9), alpha=.6, color="blue") + scale_size_area(name="Billions ($)", max_size=20, limits=c(1, max(geocode_summary$raised_amount_usd / 1e9)), breaks=c(2,4,6)))
}
}, interval=2, movie.name="ca-raised-year.gif", ani.width=640, ani.height=640)
saveGIF({
for(year in years) {
print(ggmap(ca_map) + theme(legend.position=c(.9,.2), legend.key = element_rect(fill=alpha("white", .2)), legend.background = element_rect(fill=alpha("white", .9)), axis.line=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank()) + ggtitle(paste(year)) + geom_point(data=filter(geocode_summary, funded_year == year), aes(x=lon, y=lat, size=rounds), alpha=.6, color="blue") + scale_size_area(name="Rounds", max_size=20, limits=c(1, max(geocode_summary$rounds))))
}
}, interval=2, movie.name="ca-rounds-year.gif", ani.width=640, ani.height=640)
sf_map <- get_map(location="san francisco", zoom=9, scale=2, maptype="roadmap", color="bw")
saveGIF({
for(year in years) {
print(ggmap(sf_map) + theme(legend.position=c(.9,.2),legend.key = element_rect(fill=alpha("white", .2)), legend.background = element_rect(fill=alpha("white", .9)), axis.line=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank()) + ggtitle(paste(year)) + geom_point(data=filter(geocode_summary, funded_year == year), aes(x=lon, y=lat, size=raised_amount_usd / 1e9), alpha=.6, color="blue") + scale_size_area(name="Billions ($)", max_size=20, limits=c(1, max(geocode_summary$raised_amount_usd / 1e9)), breaks=c(2,4,6)))
}
}, interval=2, movie.name="sf-raised-year.gif", ani.width=640, ani.height=640)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment