Create a gist now

Instantly share code, notes, and snippets.

@mneedham /blog.py
Last active Mar 28, 2017

What would you like to do?
Meetup API -> JSON -> CSV using Python's Luigi library
import json
import os
import luigi
import requests
from collections import Counter
from luigi.contrib.external_program import ExternalProgramTask
class Meetup(luigi.WrapperTask):
def run(self):
print("Running Meetup")
def requires(self):
key = os.environ['MEETUP_API_KEY']
lat = os.getenv('LAT', "51.5072")
lon = os.getenv('LON', "0.1275")
yield GroupsToCSV(key, lat, lon)
yield MembersToCSV(key, lat, lon)
class GroupsToJSON(luigi.Task):
key = luigi.Parameter()
lat = luigi.Parameter()
lon = luigi.Parameter()
def run(self):
seed_topic = "nosql"
uri = "https://api.meetup.com/2/groups?&topic={0}&lat={1}&lon={2}&key={3}".format(seed_topic, self.lat, self.lon, self.key)
r = requests.get(uri)
all_topics = [topic["urlkey"] for result in r.json()["results"] for topic in result["topics"]]
c = Counter(all_topics)
topics = [entry[0] for entry in c.most_common(10)]
groups = {}
for topic in topics:
uri = "https://api.meetup.com/2/groups?&topic={0}&lat={1}&lon={2}&key={3}".format(topic, self.lat, self.lon, self.key)
r = requests.get(uri)
for group in r.json()["results"]:
groups[group["id"]] = group
with self.output().open('w') as groups_file:
json.dump(list(groups.values()), groups_file, indent=4, sort_keys=True)
def output(self):
return luigi.LocalTarget("/tmp/groups.json")
class GroupsToCSV(luigi.contrib.external_program.ExternalProgramTask):
file_path = "/tmp/groups.csv"
key = luigi.Parameter()
lat = luigi.Parameter()
lon = luigi.Parameter()
def program_args(self):
return ["./groups.sh", self.input()[0].path, self.output().path]
def output(self):
return luigi.LocalTarget(self.file_path)
def requires(self):
yield GroupsToJSON(self.key, self.lat, self.lon)
class MembersToJSON(luigi.Task):
group_id = luigi.IntParameter()
key = luigi.Parameter()
def run(self):
results = []
uri = "https://api.meetup.com/2/members?&group_id={0}&key={1}".format(self.group_id, self.key)
while True:
if uri is None:
break
r = requests.get(uri)
response = r.json()
for result in response["results"]:
results.append(result)
uri = response["meta"]["next"] if response["meta"]["next"] else None
with self.output().open("w") as output:
json.dump(results, output)
def output(self):
return luigi.LocalTarget("/tmp/members/{0}.json".format(self.group_id))
class MembersToCSV(luigi.Task):
out_path = "/tmp/members.csv"
key = luigi.Parameter()
lat = luigi.Parameter()
lon = luigi.Parameter()
def run(self):
outputs = []
for input in self.input():
with input.open('r') as group_file:
groups_json = json.load(group_file)
groups = [str(group['id']) for group in groups_json]
for group_id in groups:
members = MembersToJSON(group_id, self.key)
outputs.append(members.output().path)
yield members
with self.output().open("w") as output:
writer = csv.writer(output, delimiter=",")
writer.writerow(["id", "name", "joined", "topics", "groupId"])
for path in outputs:
group_id = path.split("/")[-1].replace(".json", "")
with open(path) as json_data:
d = json.load(json_data)
for member in d:
topic_ids = ";".join([str(topic["id"]) for topic in member["topics"]])
if "name" in member:
writer.writerow([member["id"], member["name"], member["joined"], topic_ids, group_id])
def output(self):
return luigi.LocalTarget(self.out_path)
def requires(self):
yield GroupsToJSON(self.key, self.lat, self.lon)
#!/bin/bash
export MEETUP_API_KEY="<your-meetup-api-key"
PYTHONPATH="." luigi --module blog --local-scheduler Meetup --workers 3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment