Skip to content

Instantly share code, notes, and snippets.

@harej
Created April 25, 2020 16:59
Show Gist options
  • Save harej/e69f13aad49b938ddcb8b5cac9312b70 to your computer and use it in GitHub Desktop.
Save harej/e69f13aad49b938ddcb8b5cac9312b70 to your computer and use it in GitHub Desktop.
import requests
import json
from time import sleep
from pprint import pprint
# This is a quick script I came up with for ingesting "munged" Wikidata TTL dumps
# into Amazon Neptune, one at a time, going as fast as possible while respecting
# queue limits.
for i in range(0, 4243):
url = "s3://mungeout-202004/mungeOut/wikidump-00000" + str(i).zfill(4) + ".ttl.gz"
postmsg = {
"source": url,
"format": "turtle",
"iamRoleArn": "arn:aws:iam::166460529035:role/s3toneptune",
"region": "us-east-1",
"failOnError": "TRUE",
"parallelism": "HIGH",
"queueRequest": "TRUE"
}
postmsg = json.dumps(postmsg)
endpoint = "https://wdqs-01.cnftyxmevx0z.us-east-1.neptune.amazonaws.com:8182/loader"
while True:
r = requests.post(endpoint, data=postmsg, headers={"Content-Type": "application/json"})
r = r.json()
pprint(r)
if "status" in r:
if r["status"] == "200 OK":
break
else:
sleep(60)
else:
sleep(60)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment