Skip to content

Instantly share code, notes, and snippets.

@jdavidson
Created April 1, 2014 00:35
Show Gist options
  • Save jdavidson/9905430 to your computer and use it in GitHub Desktop.
Save jdavidson/9905430 to your computer and use it in GitHub Desktop.
Analysis of crunchbase acquisition data.
library(ggplot2)
library(ggthemes)
library(plyr)
library(dplyr)
library(lubridate)
library(scales)
library(data.table)
library(reshape2)
options(scipen=999)
options(stringsAsFactors = FALSE)
companies <- read.csv("2014-03-04-crunchbase_monthly_export_companies.csv")
companies <- data.table(companies)
companies$founded_month <- floor_date(ymd(paste(companies$founded_month, "01", sep="-")), "month")
companies$founded_year <- floor_date(companies$founded_month, "year")
categories <- read.csv("categories.csv")
companies <- data.table(inner_join(companies, select(categories, category_code, broad_category)))
companies <- filter(companies, broad_category %in% c("enterprise", "consumer"))
# clean up companies
companies <- filter(companies, country_code == "USA", state_code != "", founded_year > ymd("2002-01-01"))
companies$region <- toupper(gsub(" - Other", "", companies$region))
companies$city <- toupper(gsub("[^[:alnum:]///' ]", "", companies$city))
companies[region == "SF BAY"]$state_code <- "CA"
companies[region == "NEW YORK"]$state_code <- "NY"
companies[region == "LOS ANGELES"]$state_code <- "CA"
companies <- filter(companies, !region %in% c("UNKNOWN", "TBD"))
rounds <- read.csv("2014-03-04-crunchbase_monthly_export_rounds.csv")
# dedup
rounds <- data.table(rounds)
setkeyv(rounds, c("company_name", "funded_at", "funding_round_type"))
rounds <- unique(rounds)
rounds <- subset(rounds, funded_month != "1960-01")
# fix strange date data
rounds$funded_at <- ymd(paste(rounds$funded_month, "01", sep="-"))
# clean up rounds
rounds <- filter(rounds, company_country_code == "USA", company_state_code != "")
categories <- read.csv("categories.csv")
names(categories)[1] <- c("company_category_code")
rounds <- data.table(inner_join(rounds, select(categories, company_category_code, broad_category)))
rounds <- filter(rounds, broad_category %in% c("enterprise", "consumer"))
rounds <- filter(rounds, !is.na(raised_amount_usd))
rounds <- filter(rounds, funding_round_type %in% c("venture", "angel", "series-a", "series-b", "series-c+"))
last_funding_round_type <- rounds %.% arrange(company_name, funded_at) %.% group_by(company_name) %.% summarise(last_funding_round_type=last(funding_round_type))
setnames(last_funding_round_type, "company_name", "name")
companies <- join(companies, last_funding_round_type)
companies <- filter(companies, !is.na(last_funding_round_type))
acquisitions <- read.csv("2014-03-04-crunchbase_monthly_export_acquisitions.csv")
acquisitions <- data.table(acquisitions)
acquisitions$acquired_month <- floor_date(ymd(paste(acquisitions$acquired_month, "01", sep="-")), "month")
acquisitions$acquired_year <- floor_date(acquisitions$acquired_month, "year")
acquisitions <- filter(acquisitions, price_currency_code == "USD" || is.na(price_currency_code))
acquisitions$price_amount <- as.numeric(acquisitions$price_amount)
setnames(acquisitions, "company_name", "name")
companies <- join(companies, select(acquisitions, name, acquirer_name, acquired_at, acquired_month, acquired_year, price_amount, price_currency_code))
year_status <- companies %.% group_by(founded_year=founded_year) %.% summarise(acquired=sum(!is.na(acquired_at)), companies=n())
ggplot(year_status, aes(x=founded_year, y=acquired/companies)) + geom_point()
year_status <- filter(companies, acquired_year > ymd("2005-01-01")) %.% group_by(acquired_year=acquired_year) %.% summarise(companies=n())
acqplot <- ggplot(year_status, aes(x=acquired_year, y=companies)) + geom_point() + ylim(0, max(year_status$companies)) + xlab("Acquisition Year") + ylab("Acquisitions") + ggtitle("Crunchbase Covered Acquisitions")
ggsave("acquisitions.png", acqplot, width=640 / 72, height=400 / 72, dpi=72)
top_categories <- companies %.% group_by(category_code=category_code) %.% summarise(companies=n()) %.% arrange(desc(companies)) %.% head(9)
category_status <- filter(companies, category_code %in% top_categories$category_code) %.% group_by(founded_year=founded_year, category_code=category_code) %.% summarise(acquired=sum(!is.na(acquired_at)), companies=n())
ggplot(category_status, aes(x=founded_year, y=acquired/companies, group=category_code, color=category_code)) + geom_point()
category_status <- filter(companies, category_code %in% top_categories$category_code, founded_year > ymd("2006-01-01"), founded_year < ymd("2011-01-01")) %.% group_by(category_code=category_code) %.% summarise(acquired=sum(!is.na(acquired_at)), companies=n())
catplot <- ggplot(category_status, aes(x=category_code, y=acquired/companies)) + geom_bar(stat="identity") + ylab("Acquisition Rate") + xlab("Category") + ggtitle("Category Acquisition Rates")
ggsave("category_acquisition.png", catplot, width=640 / 72, height=400 / 72, dpi=72)
category_status <- companies %.% group_by(founded_year=founded_year, broad_category=broad_category) %.% summarise(acquired=sum(!is.na(acquired_at)), companies=n())
ggplot(category_status, aes(x=founded_year, y=acquired/companies, group=broad_category, color=broad_category)) + geom_point()
catplot <- ggplot(filter(category_status, founded_year < ymd("2013-01-01")), aes(x=founded_year, y=acquired/companies, fill=broad_category)) + geom_bar(stat="identity", position="dodge") + ylab("Acquisition Rate") + xlab("Founded Year") + ggtitle("Category Acquisition Rates Over Time")
ggsave("category_year_acquisition.png", catplot, width=640 / 72, height=400 / 72, dpi=72)
category_status <- companies %.% group_by(broad_category=broad_category) %.% summarise(acquired=sum(!is.na(acquired_at)), companies=n())
ggplot(category_status, aes(x=broad_category, y=acquired/companies)) + geom_bar(stat="identity")
category_status.pt <- melt(data.frame(category_status), "broad_category")
ggplot(category_status.pt, aes(x=broad_category, y=value, fill=variable)) + geom_bar(stat="identity", position="dodge")
last_funding_round_type_total <- companies %.% group_by(founded_year=founded_year, last_funding_round_type=last_funding_round_type) %.% summarise(companies=n(), acquired=sum(!is.na(acquired_at)), reported=sum(!is.na(price_amount)), price_amount=sum(price_amount, na.rm=T))
ggplot(filter(last_funding_round_type_total, last_funding_round_type %in% c("series-a", "series-b", "series-c+")), aes(x=founded_year, y=acquired/companies, color=last_funding_round_type, group=last_funding_round_type)) + geom_point()
companies <- transform(companies, delta=year(acquired_year) - year(founded_year))
age <- filter(companies, acquired_year > ymd("2005-01-01"), delta >= 0) %.%
group_by(acquired_year=acquired_year, founded_year=founded_year, delta=delta) %.%
summarise(companies=n())
ggplot(age, aes(x=acquired_year, y=companies, fill=as.factor(founded_year))) + geom_bar(stat="identity")
ggplot(age, aes(x=acquired_year, y=companies, fill=as.factor(delta))) + geom_bar(stat="identity")
age_summary <- filter(companies, acquired_year > ymd("2005-01-01"), delta >= 0) %.%
group_by(acquired_year=acquired_year) %.%
summarise(companies=n(), avg_delta=mean(delta), med_delta=as.double(median(delta)))
ageplot <- ggplot(filter(age_summary, acquired_year > ymd("2008-01-01")), aes(x=acquired_year, y=avg_delta)) + geom_point() + geom_smooth(method="lm") + ylab("Average LifeTime (Years)") + xlab("Acquisition Year")
ggsave("avg_acquisition.png", ageplot, width=640 / 72, height=400 / 72, dpi=72)
category_age_summary <- filter(companies, acquired_year > ymd("2005-01-01"), delta >= 0) %.%
group_by(acquired_year=acquired_year, broad_category=broad_category) %.%
summarise(companies=n(), avg_delta=mean(delta), med_delta=as.double(median(delta)))
cageplot <- ggplot(filter(category_age_summary, acquired_year > ymd("2008-01-01")), aes(x=acquired_year, y=avg_delta, color=broad_category)) + geom_point() + geom_smooth(method="lm") + ylab("Average LifeTime (Years)") + xlab("Acquisition Year")
ggsave("avg_category_acquisition_lifetime.png", cageplot, width=640 / 72, height=400 / 72, dpi=72)
avg_delta=mean(delta), med_delta=as.double(median(delta)
price_summary <- filter(companies, acquired_year > ymd("2005-01-01"), !is.na(price_amount)) %.%
group_by(acquired_year=acquired_year) %.%
summarise(companies=n(),
raised=sum(!is.na(funding_total_usd)),
price_amount=sum(price_amount),
funding_total_usd=sum(funding_total_usd, na.rm=T),
avg_price_amount=mean(price_amount),
avg_funding_total_usd=mean(funding_total_usd),
med_price_amount=as.double(median(price_amount)),
med_funding_total_usd=as.double(median(funding_total_usd)))
sub_price_summary <- select(filter(price_summary, acquired_year > ymd("2008-01-01")), acquired_year, avg_price_amount, med_price_amount)
sub_price_summary.pt <- melt(data.frame(sub_price_summary), "acquired_year")
priceplot <- ggplot(sub_price_summary.pt, aes(x=year(acquired_year), y=value / 1e6, color=variable)) + geom_point() + ylab("$ (Millions)") + xlab("Acquisition Year") + scale_y_continuous(labels=dollar_format()) + ggtitle("Avg vs Median Acquisition Price")
ggsave("avg_acquisition.png", priceplot, width=640 / 72, height=400 / 72, dpi=72)
momplot <- ggplot(filter(price_summary, acquired_year > ymd("2008-01-01")), aes(x=year(acquired_year), y=avg_price_amount / avg_funding_total_usd)) + geom_point() + ylab("Cash on Cash Multiple") + xlab("Acquisition Year") + ylim(0, 40) + ggtitle("Cash on Cash Multiples")
ggsave("money_multiple.png", momplot, width=640 / 72, height=400 / 72, dpi=72)
acqplot <- ggplot(filter(price_summary, acquired_year > ymd("2008-01-01")), aes(x=acquired_year, y=companies)) + geom_point() + ylab("Acquisitions") + xlab("Acquisition Year") + ggtitle("Companies with Reported Acquisitions") + ylim(0, max(price_summary$companies))
ggsave("reported_acquisitions.png", acqplot, width=640 / 72, height=400 / 72, dpi=72)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment