Skip to content

Instantly share code, notes, and snippets.

@jdavidson
Created February 9, 2014 23:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdavidson/8907888 to your computer and use it in GitHub Desktop.
Save jdavidson/8907888 to your computer and use it in GitHub Desktop.
Analysis of unicorns and crunchbase company status by founding year
library(ggplot2)
library(ggthemes)
library(dplyr)
library(lubridate)
library(scales)
library(data.table)
library(reshape2)
options(scipen=999)
options(stringsAsFactors = FALSE)
companies <- read.csv("2014-01-06-crunchbase_monthly_export_companies.csv")
companies <- data.table(companies)
companies$founded_month <- floor_date(ymd(paste(companies$founded_month, "01", sep="-")), "month")
companies$founded_year <- year(companies$founded_month)
companies <- filter(companies, founded_year > 2002, founded_year < 2013)
companies <- filter(companies, country_code == "USA")
categories <- read.csv("categories.csv")
names(categories)[1] <- c("category_code")
companies <- data.table(inner_join(companies, select(categories, category_code, broad_category)))
companies <- filter(companies, broad_category %in% c("enterprise", "consumer"))
year_totals <- companies %.% group_by(year=founded_year) %.% summarise(total_companies=n())
unicorn_year <- read.csv("unicorn-year.csv")
year_totals <- inner_join(as.data.frame(year_totals), unicorn_year)
year_totals <- transform(year_totals, unicorn_percent=unicorns / total_companies)
yplot <- ggplot(year_totals, aes(x=as.factor(founded_year), y= unicorn_percent)) + geom_bar(stat="identity") + ylab("Unicorn Percentage") + xlab("") + ggtitle("Fraction of Unicorns (>$1B valuation) out of US Venture Backed Tech Startups") + scale_y_continuous(labels = percent_format())
ggsave("unicorn-percent.png", yplot, width=640 / 72, height=400 / 72, dpi=72)
year_status <- companies %.% group_by(year=founded_year, status) %.% summarise(companies=n())
year_status <- inner_join(as.data.frame(year_status), year_totals)
eplot <- ggplot(filter(year_status, status %in% c("acquired", "ipo")), aes(x=as.factor(founded_year), y=companies / total_companies)) + geom_bar(stat="identity") + ylab("Exit Percentage") + xlab("") + ggtitle("US Venture Backed Tech Startups Exit Percent") + scale_y_continuous(labels = percent_format())
ggsave("exit-percent.png", eplot, width=640 / 72, height=400 / 72, dpi=72)
cplot <- ggplot(filter(year_status, status == "closed"), aes(x=as.factor(founded_year), y=companies / total_companies)) + geom_bar(stat="identity") + ylab("Closed Percentage") + xlab("") + ggtitle("US Venture Backed Tech Startups Closed Percent") + scale_y_continuous(labels = percent_format())
ggsave("closed-percent.png", cplot, width=640 / 72, height=400 / 72, dpi=72)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment