Skip to content

Instantly share code, notes, and snippets.

@jdavidson
Created February 24, 2014 16:11
Show Gist options
  • Save jdavidson/9191278 to your computer and use it in GitHub Desktop.
Save jdavidson/9191278 to your computer and use it in GitHub Desktop.
Analysis of follow on rates by investor class
library(ggplot2)
library(ggthemes)
library(plyr)
library(dplyr)
library(lubridate)
library(scales)
library(data.table)
library(reshape2)
options(scipen=999)
options(stringsAsFactors = FALSE)
# fix strange difference in units from diff
my.diff <- function(x, lag=1) {
n <- length(x)
round(difftime(x[(1+lag):n], x[1:(n-lag)], units="days") / 30)
}
rounds <- read.csv("2014-01-06-crunchbase_monthly_export_rounds.csv")
# dedup
rounds <- data.table(rounds)
setkeyv(rounds, c("company_name", "funded_at", "funding_round_type"))
rounds <- unique(rounds)
rounds <- subset(rounds, funded_month != "1960-01")
# fix strange date data
rounds$funded_at <- ymd(paste(rounds$funded_month, "01", sep="-"))
rounds <- arrange(rounds, funded_at)
rounds <- rounds[, id := seq_along(funded_at), by=company_name]
rounds <- rounds[, diff := c(my.diff(funded_at), NA), by=company_name]
# clean up rounds
rounds <- filter(rounds, company_country_code == "USA", company_state_code != "")
rounds <- filter(rounds, !is.na(raised_amount_usd))
rounds$round_raised_amount_usd <- cut(rounds$raised_amount_usd, breaks=c(0, 1000000, 5000000, 10000000, Inf), right=FALSE)
rounds$round_raised_amount_usd <- mapvalues(rounds $round_raised_amount_usd, from = c("[0,1e+06)", "[1e+06,5e+06)", "[5e+06,1e+07)", "[1e+07,Inf)"), to = c("$0-1M", "$1-5M", "$5-10M", "$10+"))
categories <- read.csv("categories.csv")
names(categories)[1] <- c("company_category_code")
rounds <- data.table(inner_join(rounds, select(categories, company_category_code, broad_category)))
rounds <- filter(rounds, broad_category %in% c("enterprise", "consumer"))
#
investments <- read.csv("2014-01-06-crunchbase_monthly_export_investments.csv")
investments <- subset(investments, funded_month != "1960-01")
investments$funded_at <- ymd(paste(investments$funded_month, "01", sep="-"))
investments <- data.table(investments)
setkeyv(investments, c("company_name", "funded_at", "funding_round_type", "investor_name"))
investments <- unique(investments)
investments$funded_month <- floor_date(ymd(paste(investments $funded_month, "01", sep="-")), "month")
investments$funded_year <- floor_date(investments $funded_month, "year")
investments$investor_type <- unlist(lapply(strsplit(investments$investor_permalink, "/"), function(x) {x[2]}))
investments$investor_type <- as.factor(sub("-organization", "", investments$investor_type))
investments <- filter(investments, funding_round_type %in% c("venture", "angel", "crowdfunding", "series-a", "series-b", "series-c+"))
investors <- filter(investments, funded_at > ymd("2008-01-01"), investor_type == "financial") %.% group_by(investor_name) %.% summarise(total_rounds=n())
investors_rounds <- filter(investments, funded_at > ymd("2008-01-01"), investor_type == "financial") %.% group_by(investor_name, funding_round_type) %.% summarise(rounds=n())
investors_rounds <- join(investors_rounds, investors)
investors_rounds <- transform(investors_rounds, perc_rounds = rounds / total_rounds)
seed_funds <- as.data.frame(filter(investors_rounds, total_rounds > 20, funding_round_type == "angel", perc_rounds > .2)) %.% arrange(desc(perc_rounds)) %.% head(100)
top_institutional_investors <- as.data.frame(filter(investments, funded_at > ymd("2008-01-01"), investor_type == "financial", !(investor_name %in% seed_funds$investor_name))) %.% group_by(investor_name) %.% summarise(total_rounds=n()) %.% arrange(desc(total_rounds)) %.% head(100)
investments <- join(investments, select(rounds, company_name,funded_at,funding_round_type,diff))
seed_fund_success <- filter(investments, funding_round_type == "angel", funded_at > ymd("2008-01-01"), investor_type == "financial") %.% group_by(seed_fund = investor_name %in% seed_funds$investor_name, year=year(funded_at)) %.% summarise(rounds=n(), follow_on=sum(!is.na(diff))) %.% transform(follow_on_rate = follow_on / rounds)
ggplot(filter(seed_fund_success, year < 2014), aes(x=as.factor(year), y= follow_on_rate, fill=seed_fund)) + geom_bar(stat="identity", position="dodge") + scale_y_continuous(labels = percent_format())
top_fund_success <- filter(investments, funding_round_type == "angel", funded_at > ymd("2006-01-01"), investor_type == "financial") %.% group_by(top_fund = investor_name %in% top_institutional_investors$investor_name, year=year(funded_at)) %.% summarise(rounds=n(), follow_on=sum(!is.na(diff))) %.% transform(follow_on_rate = follow_on / rounds)
ggplot(filter(top_fund_success, year < 2014), aes(x=as.factor(year), y= follow_on_rate, fill= top_fund)) + geom_bar(stat="identity", position="dodge") + scale_y_continuous(labels = percent_format())
investments <- transform(investments, fund_class = ifelse(investor_name %in% top_institutional_investors$investor_name, "top_fund", ifelse(investor_name %in% seed_funds$investor_name, "seed_fund", as.character(investor_type))))
success <- filter(investments, funding_round_type == "angel", funded_at > ymd("2008-01-01"), investor_type %in% c("financial", "person")) %.% group_by(fund_class, year=year(funded_at)) %.% summarise(rounds=n(), follow_on=sum(!is.na(diff))) %.% transform(follow_on_rate = follow_on / rounds)
aplot <- ggplot(filter(success, year < 2013), aes(x=as.factor(year), y= follow_on_rate, fill= fund_class)) + geom_bar(stat="identity", position="dodge") + scale_y_continuous(labels = percent_format()) + ggtitle("Angel Follow On Rate") + xlab("") + ylab("Follow On Rate")
ggsave("angel-follow-on.png", aplot, width=640 / 72, height=400 / 72, dpi=72)
investor_type_success <- filter(investments, funding_round_type == "angel", funded_at > ymd("2008-01-01")) %.% group_by(investor_type, year=year(funded_at)) %.% summarise(rounds=n(), follow_on=sum(!is.na(diff))) %.% transform(follow_on_rate = follow_on / rounds)
tplot <- ggplot(filter(investor_type_success, year < 2014), aes(x=as.factor(year), y= follow_on_rate, fill= investor_type)) + geom_bar(stat="identity", position="dodge") + scale_y_continuous(labels = percent_format()) + ggtitle("Angel Follow On Rate") + xlab("") + ylab("Follow On Rate")
ggsave("angel-follow-on-type.png", tplot, width=640 / 72, height=400 / 72, dpi=72)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment