Skip to content

Instantly share code, notes, and snippets.

@dufferzafar
Created April 22, 2016 21:05
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save dufferzafar/4018277a08d3f6de9e74f11a3f913e78 to your computer and use it in GitHub Desktop.
Save dufferzafar/4018277a08d3f6de9e74f11a3f913e78 to your computer and use it in GitHub Desktop.
Generate CSV for all selected GSoC 2016 projects. Output: https://gist.github.com/dufferzafar/efa128fca955d524be2907bb4d7347a6/
import csv
import cStringIO
import codecs
import glob
import json
import requests
def fetch_jsons():
url = ('https://summerofcode.withgoogle.com/api/program/current/project/'
'?page=%d'
'&page_size=10')
for page in range(1, 126):
obj = requests.get(url % page).json()
# Finished
if 'results' not in obj:
print("Ending")
break
# Save each project into a separate json
# (i like having all data locally)
for result in obj['results']:
fpath = 'projects/%s.json' % str(result['id'])
print(fpath)
with open(fpath, 'w') as out:
json.dump(result, out, indent=4, sort_keys=True)
# Copied from SO!
# Why do I have to do this everytime :/
class UnicodeCSVWriter:
def __init__(self, f, dialect=csv.excel, encoding="utf-8-sig", **kwds):
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
'''writerow(unicode) -> None
This function takes a Unicode string and encodes it to the output.
'''
self.writer.writerow([s.encode("utf-8") for s in row])
data = self.queue.getvalue()
data = data.decode("utf-8")
data = self.encoder.encode(data)
self.stream.write(data)
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
def generate_csv():
outcsv = UnicodeCSVWriter(open("projects.csv", "w"), quoting=csv.QUOTE_ALL)
for file in glob.glob('projects/*.json'):
with open(file, 'r') as inp:
project = json.load(inp)
outcsv.writerow((
project['title'],
project['student']['display_name'],
project['organization']['name'],
", ".join(project['assignee_display_names']),
))
if __name__ == '__main__':
# fetch_jsons()
generate_csv()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment