Skip to content

Instantly share code, notes, and snippets.

@marcoi
Forked from bootandy/crunchbase_loader.py
Created December 4, 2012 11:47
Show Gist options
  • Save marcoi/4202981 to your computer and use it in GitHub Desktop.
Save marcoi/4202981 to your computer and use it in GitHub Desktop.
how2 load data from Crunchbase (python)
import requests
import pymongo
CRUNCHBASE_API = 'http://api.crunchbase.com/v/1/company/'
CRUNCHBASE_API_END = '.js?api_key=CRUNCHBASE_API'
db_connection = pymongo.Connection('localhost', 27017)
db = db_connection['tc']
def main():
load_companies()
def load_companies():
# Each entry in db.companies has a field 'permalink' which is the Crunchbase unique name
companies = db.companies.find().limit(1000)
for c in companies:
process_results(query_for_company(c['permalink']))
def query_for_company(company):
url = CRUNCHBASE_API + company + CRUNCHBASE_API_END
print 'getting: ' + url
r = requests.get(url)
data = r.json
return data
def process_results(data):
if data:
to_save = {}
to_save['name'] = data['name']
to_save['overview'] = data['overview']
to_save['tag_list'] = data['tag_list']
if not to_save['tag_list']:
to_save['tag_list'] = ""
to_save['category_code'] = data['category_code']
to_save['permalink'] = data['permalink']
db.tagged_companies.insert(to_save)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment