Skip to content

Instantly share code, notes, and snippets.

@mikesname
Last active November 7, 2017 13:33
Show Gist options
  • Save mikesname/3f03e09c8c2865cd3b80eb51351c5f85 to your computer and use it in GitHub Desktop.
Save mikesname/3f03e09c8c2865cd3b80eb51351c5f85 to your computer and use it in GitHub Desktop.
An example for fetching data from the EHRI GraphQL API and converting it to TSV
#!/usr/bin/env python3
# Fetch id, name, and scope-content data for documentary units
# and write as tab-separated values.
import sys, requests, csv, json
if len(sys.argv) < 1:
sys.stderr.write("usage: history.py <initial-api-url>\n")
sys.exit(1)
# Fetch 50 items per request
FETCH_NUM = 50
URL = sys.argv[1]
# The actual GraphQL query. The number of items per request
# is a mandatory parameter (which we could also hard-code).
# The cursor is an optional parameter which defaults to null.
QUERY = """
query getRepositoryHistory($num: Int!, $cursor: Cursor) {
page: repositories(first: $num, from: $cursor) {
items {
id
description {
name
history
}
}
pageInfo {
nextPage
}
}
}
"""
csvwriter = csv.writer(sys.stdout, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
csvwriter.writerow(["id", "name", "history"]) # header
def get_history(page_num = 1, cursor=None):
sys.stderr.write("Fetching page: %d\n" % page_num)
args = dict(
query = QUERY,
variables = dict(
num = FETCH_NUM,
cursor = cursor
)
)
r = requests.post(URL,
data = json.dumps(args),
headers = {"Content-type": "application/json"})
data = r.json()
page = data["data"]["page"]
for item in page["items"]:
# fetch the ID and first description...
id = item["id"]
description = item["description"]
if description is not None:
name = item["description"]["name"]
history = item["description"]["history"]
if not history is None:
csvwriter.writerow([id, name, history])
next = page["pageInfo"]["nextPage"]
if next is not None:
get_history(page_num + 1, next)
get_history()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment