Skip to content

Instantly share code, notes, and snippets.

@pganti
Created December 22, 2019 20:20
Show Gist options
  • Save pganti/744ef65ca1127d35d6fb4059b6edd294 to your computer and use it in GitHub Desktop.
Save pganti/744ef65ca1127d35d6fb4059b6edd294 to your computer and use it in GitHub Desktop.
crunchbase to network
# Crunchbase to NetworkX network builder
#
# Builds a network from the Crunchbase database and outputs it in graphml format.
#
# Required modules:
# simplejson (http://undefined.org/python/#simplejson)
# networkx (http://networkx.lanl.gov/)
import urllib2, simplejson as json, networkx as nx
def getCBinfo(namespace, permalink):
api_url = "http://api.crunchbase.com/v/1/%s/%s.js" % (namespace, permalink)
return json.loads(urllib2.urlopen(api_url).read())
def add_clique(G,investors):
# Take a set of investors and add them to the graph, along with edges
# between them all. Where an edge already exists, increment its weight.
l_inv = len(investors)
if l_inv > 1:
# add nodes
for inv, typ in investors:
G.add_node(inv, inv_type = typ)
# add edges
for i in range(0,l_inv-1):
for j in range(i+1,l_inv):
if G.edge[investors[i][0]].has_key(investors[j][0]):
G.edge[investors[i][0]][investors[j][0]]['weight'] += 1
else:
G.add_edge(investors[i][0],investors[j][0],weight=1)
return G
# Main.
# Get the list of companies Crunchbase has data on
company_names = json.loads(urllib2.urlopen("http://api.crunchbase.com/v/1/companies.js").read())
# initialize Graph
G = nx.Graph()
# Iterate through companies, getting CB data on each
for company in company_names:
try:
co_info = getCBinfo('company', company['permalink'])
except:
continue
# For each company make a set of all investors
investors = set()
if co_info.has_key('funding_rounds') and co_info['funding_rounds']:
for iround in co_info['funding_rounds']:
for investment in iround['investments']:
for i_type in ['financial_org','person','company']:
if investment[i_type]:
investors.add((investment[i_type]['permalink'],i_type))
# Add investors and edges between them to the graph
G = add_clique(G,list(investors))
# Write the network to a graphml file /projects/cb_graph.graphml
# NetworkX supports many other formats as well, check the docs.
nx.write_graphml(G,"/projects/cb_graph.graphml")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment