Last active
January 28, 2021 20:38
-
-
Save caodac/8c9ba3bcced4050df52bcc8762e2a09f to your computer and use it in GitHub Desktop.
ARS regression test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from neo4j import GraphDatabase | |
from jinja2 import Template | |
import concurrent.futures | |
import sys, json, threading, requests, random | |
# update this query accordingly | |
QUERY = """{ | |
"message": { | |
"query_graph": { | |
"nodes": { | |
"n00": { | |
"id": "{{ disease }}", | |
"category": "biolink:Disease" | |
}, | |
"n01": { | |
"category": "biolink:ChemicalSubstance" | |
} | |
}, | |
"edges": { | |
"e00": { | |
"subject": "n00", | |
"object": "n01" | |
} | |
} | |
} | |
} | |
} | |
""" | |
ars_url = "https://ars.transltr.io/ars/api/submit" | |
num_threads = 2 | |
template = Template(QUERY) | |
uri = "bolt://disease.ncats.io:80" | |
driver = GraphDatabase.driver(uri, auth=("neo4j", "")) | |
def runquery(disease, timeout=1000): | |
query = template.render(disease=disease) | |
headers = { 'Content-Type': 'application/json' } | |
print('searching for %s...' % disease, file=sys.stderr) | |
r = requests.post(ars_url, data=query, headers=headers, timeout=timeout) | |
print('%d: %s' % (r.status_code, disease), file=sys.stderr) | |
return r | |
def run_cypher(tx, query): | |
diseases = [] | |
for row in tx.run(query): | |
diseases.append(row['MONDO']) | |
return diseases | |
def get_monogenic_diseases(): | |
CYPHER=""" | |
MATCH(n:S_MONDO)--(d:DATA) WHERE d.id='MONDO:0000275' | |
WITH n,d MATCH (n)<-[e:R_subClassOf*1..10]-(m:S_MONDO)<-[:PAYLOAD]-(z:DATA) | |
WITH m,z MATCH p=(d:DATA)-[:PAYLOAD]->(:S_GARD)-[:R_exactMatch|:R_equivalentClass]-(m) | |
OPTIONAL MATCH q=(m)-[:R_exactMatch|:R_equivalentClass]-(:S_ORDO_ORPHANET)-[e:R_rel{name:'disease_associated_with_gene'}]->(:S_ORDO_ORPHANET)<-[:PAYLOAD]-(o:DATA) | |
RETURN DISTINCT d.id as `GARD_ID`,d.name as `GARD_Disease`,z.notation as MONDO,e.DisorderGeneAssociationType as `Disease_Gene_Association`,o.symbol as `Gene_Symbol`,o.label as `Gene_Name` | |
ORDER BY d.id | |
""" | |
with driver.session() as session: | |
return session.read_transaction(run_cypher, CYPHER) | |
def dos_attack(vector, threads=2): | |
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: | |
with open('ars-regression-results.txt', 'w') as f: | |
for disease, r in zip(vector, executor.map(runquery, vector)): | |
if 201 == r.status_code: | |
mesg = r.json() | |
print ('%s\t%d\t%s' % ( | |
disease, r.status_code, mesg['pk']), file=f) | |
else: | |
print ('%s\t%d\t%s' % ( | |
disease, r.status_code, r.text), file=f) | |
if __name__ == "__main__": | |
if len(sys.argv) == 1: | |
print('Usage: %s SIZE\nwhere SIZE is the number of diseases' % sys.argv[0]) | |
sys.exit(1) | |
size = int(sys.argv[1]) | |
attack_vector = get_monogenic_diseases() | |
random.shuffle(attack_vector) | |
dos_attack(attack_vector[0:size]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment