Skip to content

Instantly share code, notes, and snippets.

@xbrianh
Created July 12, 2019 20:52
Show Gist options
  • Save xbrianh/a444bb460fdb8c4564ea6a82a98ce8e0 to your computer and use it in GitHub Desktop.
Save xbrianh/a444bb460fdb8c4564ea6a82a98ce8e0 to your computer and use it in GitHub Desktop.
This script demonstrates a DSS Elasticsearch query for bundles by project uuid.
#!/usr/bin/env python
import argparse
from hca.dss import DSSClient
def get_dss_client(deployment):
if "prod" == deployment:
url = "https://dss.data.humancellatlas.org/v1/swagger.json"
else:
url = f"https://dss.{deployment}.data.humancellatlas.org/v1/swagger.json"
return DSSClient(swagger_url=url)
def find_bundles(client, replica, project_uuid):
q = {
"query": {
"bool": {
"must": [
{
"match": {
"files.project_json.provenance.document_id": project_uuid
}
}
]
}
}
}
for hit in client.post_search.iterate(replica=replica, es_query=q, output_format="raw"):
yield hit
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--deployment", "-d", choices=["dev", "integration", "staging", "prod"], default="dev")
parser.add_argument("--replica", "-r", choices=["aws", "gcp"], default="aws")
parser.add_argument("project_uuid")
args = parser.parse_args()
client = get_dss_client(args.deployment)
for hit in find_bundles(client, args.replica, args.project_uuid):
fqid = "{}.{}".format(hit['metadata']['uuid'], hit['metadata']['manifest']['version'])
print(fqid)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment