Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Analysis of start up category performance from crunchbase
library(ggplot2)
library(ggthemes)
library(dplyr)
library(lubridate)
library(scales)
library(data.table)
library(reshape2)
options(scipen=999)
options(stringsAsFactors = FALSE)
# fix strange difference in units from diff
my.diff <- function(x, lag=1) {
n <- length(x)
round(difftime(x[(1+lag):n], x[1:(n-lag)], units="days") / 30)
}
rounds <- read.csv("2014-01-06-crunchbase_monthly_export_rounds.csv")
# dedup
rounds <- data.table(rounds)
setkeyv(rounds, c("company_name", "funded_at", "funding_round_type"))
rounds <- unique(rounds)
rounds <- subset(rounds, funded_month != "1960-01")
# fix strange date data
rounds$funded_at <- ymd(paste(rounds$funded_month, "01", sep="-"))
rounds <- arrange(rounds, funded_at)
rounds <- rounds[, id := seq_along(funded_at), by=company_name]
rounds <- rounds[, diff := c(my.diff(funded_at), NA), by=company_name]
# clean up rounds
rounds <- filter(rounds, company_country_code == "USA", company_state_code != "")
rounds$company_region <- toupper(gsub(" - Other", "", rounds$company_region))
rounds$company_city <- toupper(gsub("[^[:alnum:]///' ]", "", rounds$company_city))
rounds[company_region == "SF BAY"]$company_state_code <- "CA"
rounds[company_region == "NEW YORK"]$company_state_code <- "NY"
rounds[company_region == "LOS ANGELES"]$company_state_code <- "CA"
rounds <- filter(rounds, !company_region %in% c("UNKNOWN", "TBD"))
rounds$geocode <- paste(rounds$company_region, rounds$company_state_code, rounds$company_country_code)
categories <- read.csv("categories.csv")
names(categories)[1] <- c("company_category_code")
rounds <- data.table(inner_join(rounds, select(categories, company_category_code, broad_category)))
rounds <- filter(rounds, broad_category %in% c("enterprise", "consumer"))
category_success <- filter(rounds, id==1, funded_at > ymd("2005-01-01")) %.% group_by(broad_category, year=year(funded_at)) %.% summarise(companies=n(), follow_on=sum(!is.na(diff))) %.% arrange(desc(companies))
cplot <- ggplot(filter(category_success, year < 2014), aes(x=as.factor(year), y=follow_on / companies, fill= broad_category)) + geom_bar(stat="identity", position="dodge") + ylab("Follow On Rate") + xlab("Year") + ggtitle("Follow On Rate by Category") + scale_fill_discrete(name="Category") + theme(legend.position=c(.9,.8),legend.key = element_rect(fill=alpha("white", .2)), legend.background = element_rect(fill=alpha("white", .9)), legend.title=element_blank())
ggsave("category-follow-on.png", cplot, width=640 / 72, height=400 / 72, dpi=72)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment