Skip to content

Instantly share code, notes, and snippets.

@trhura
Created November 8, 2013 05:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save trhura/7366740 to your computer and use it in GitHub Desktop.
Save trhura/7366740 to your computer and use it in GitHub Desktop.
script to parse startup infos from https://angel.co and save it into a csv file.
# Author: Thura Hlaing <trhura@gmail.com>
# Time-stamp: <2013-11-08 12:11:30 (trhura)>
# script to parse startup infos from https://api.angel.co/1/startups?filter=raising and
# save it into a csv file.
__author__ = "Thura Hlaing <trhura@gmail.com>"
import csv
import requests
import codecs
def parse_startup(json):
name = json['name']
angellist_url = json['angellist_url']
launch_date = json['launch_date']
quality = json['quality']
follower_count = json['follower_count']
locations_name = " ".join(l.get('name', '') for l in json['locations'])
company_type = " ".join(l.get('name', '') for l in json['company_type'])
round_opened_at = json['fundraising'].get('round_opened_at', '')
raising_amount = json['fundraising'].get('raising_amount', '')
pre_money_valuation = json['fundraising'].get('pre_money_valuation', '')
discount = json['fundraising'].get('discount', '')
equity_basis = json['fundraising'].get('equity_basis', '')
updated_at = json['fundraising'].get('updated_at', '')
raised_amount = json['fundraising'].get('raised_amount', '')
row = [name, angellist_url, launch_date, quality,
follower_count, locations_name, company_type,
round_opened_at, raising_amount, pre_money_valuation,
discount, equity_basis, updated_at, raised_amount
]
return row
def main():
baseurl = 'https://api.angel.co/1/startups?filter=raising&page=%s'
with codecs.open ("startupinfo.csv", mode="w",encoding='utf-8') as csvFile:
writer = csv.writer (csvFile, delimiter=",", quotechar='"')
current_page = 1
while True:
try:
url = baseurl % current_page
print "Requesting url - %s " %url
response = requests.get(url)
json = response.json()
startups_json = json["startups"]
for startup in startups_json:
try:
row = parse_startup(startup)
writer.writerow(row)
except Exception, ex:
print ex
continue
last_page = json['last_page']
if current_page == last_page: break
current_page += 1
except Exception:
print ex
continue
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment