Instantly share code, notes, and snippets.

Embed
What would you like to do?
Analysis of start up category performance from crunchbase
library(ggplot2)
library(ggthemes)
library(dplyr)
library(lubridate)
library(scales)
library(data.table)
library(reshape2)
options(scipen=999)
options(stringsAsFactors = FALSE)
# fix strange difference in units from diff
my.diff <- function(x, lag=1) {
n <- length(x)
round(difftime(x[(1+lag):n], x[1:(n-lag)], units="days") / 30)
}
rounds <- read.csv("2014-01-06-crunchbase_monthly_export_rounds.csv")
# dedup
rounds <- data.table(rounds)
setkeyv(rounds, c("company_name", "funded_at", "funding_round_type"))
rounds <- unique(rounds)
rounds <- subset(rounds, funded_month != "1960-01")
# fix strange date data
rounds$funded_at <- ymd(paste(rounds$funded_month, "01", sep="-"))
rounds <- arrange(rounds, funded_at)
rounds <- rounds[, id := seq_along(funded_at), by=company_name]
rounds <- rounds[, diff := c(my.diff(funded_at), NA), by=company_name]
# clean up rounds
rounds <- filter(rounds, company_country_code == "USA", company_state_code != "")
rounds$company_region <- toupper(gsub(" - Other", "", rounds$company_region))
rounds$company_city <- toupper(gsub("[^[:alnum:]///' ]", "", rounds$company_city))
rounds[company_region == "SF BAY"]$company_state_code <- "CA"
rounds[company_region == "NEW YORK"]$company_state_code <- "NY"
rounds[company_region == "LOS ANGELES"]$company_state_code <- "CA"
rounds <- filter(rounds, !company_region %in% c("UNKNOWN", "TBD"))
rounds$geocode <- paste(rounds$company_region, rounds$company_state_code, rounds$company_country_code)
categories <- read.csv("categories.csv")
names(categories)[1] <- c("company_category_code")
rounds <- data.table(inner_join(rounds, select(categories, company_category_code, broad_category)))
rounds <- filter(rounds, broad_category %in% c("enterprise", "consumer"))
category_success <- filter(rounds, id==1, funded_at > ymd("2005-01-01")) %.% group_by(broad_category, year=year(funded_at)) %.% summarise(companies=n(), follow_on=sum(!is.na(diff))) %.% arrange(desc(companies))
cplot <- ggplot(filter(category_success, year < 2014), aes(x=as.factor(year), y=follow_on / companies, fill= broad_category)) + geom_bar(stat="identity", position="dodge") + ylab("Follow On Rate") + xlab("Year") + ggtitle("Follow On Rate by Category") + scale_fill_discrete(name="Category") + theme(legend.position=c(.9,.8),legend.key = element_rect(fill=alpha("white", .2)), legend.background = element_rect(fill=alpha("white", .9)), legend.title=element_blank())
ggsave("category-follow-on.png", cplot, width=640 / 72, height=400 / 72, dpi=72)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment