Skip to content

Instantly share code, notes, and snippets.

@nerdstrike
Created January 15, 2020 17:04
Show Gist options
  • Save nerdstrike/881b36894b2663fcd493d73e9901be9a to your computer and use it in GitHub Desktop.
Save nerdstrike/881b36894b2663fcd493d73e9901be9a to your computer and use it in GitHub Desktop.
A one-file graphql server
import ariadne
from ariadne import QueryType, ObjectType, UnionType
import pymongo
from aiodataloader import DataLoader
from collections import defaultdict
config = {
'host': 'IPADDRESS',
'port': 'PORT',
'user': 'USER',
'password': 'PASS',
'db': 'DB'
}
def connect(config):
"Connect to MongoDB"
host = config.get("host")
port = config.get("port")
user = config.get("user")
password = config.get("password")
db = config.get("db")
client = pymongo.MongoClient(
host,
port,
read_preference=pymongo.ReadPreference.SECONDARY_PREFERRED
)
client.admin.authenticate(user, password)
return client[db]
# Establish query entry point, and types that may be used in the query
# e.g. query { gene { ... }}
query = QueryType()
gene = ObjectType('Gene')
transcript = ObjectType('Transcript')
exon = ObjectType('Exon')
# Special type where data may contain different types of thing
# In this case, looking up features by their location can include
# different feature types at different times
locus = UnionType('RegionResult')
# Get the MongoDB connection and a collection to query
db = connect(config)
collection = db['graphql-test']
# Now we assign resolvers to each object type
# We only need resolvers where additional work is required beyond
# the result the top query returned
@query.field('gene')
def resolve_gene(_, info, name):
result = collection.find_one({
"type": "Gene",
"name": name
})
return(result)
# Open ended high cardinality queries are a bad idea, here is one
@query.field('genes')
def resolve_genes(_, info):
result = collection.find({
"type": "Gene"
})
return result
@query.field('transcripts')
def resolve_transcripts(_, info):
result = collection.find({
"type": "Transcript"
})
return result
@query.field('transcript')
def resolve_transcript(root, info, stable_id):
result = collection.find_one({
"type": "Transcript",
"stable_id": stable_id
})
return result
# Fetching many documents that reference documents will require a special
# kind of resolver. As MongoDB is not relational and we don't want to
# reinvent SQL, we will batch up requests for related documents.
# Without a "DataLoader" to do this, we would need one query per resolve
# Instead we get a 1+ queries that deal with them en-masse
# You could do this in one query in SQL, but it would be odd to follow
# the resolver event loop of GraphQL when you already had everything you
# needed to select out the data you wanted
async def batch_load_fn(keys):
length = len(keys)
# DataLoader will aggregate many single ID requests into 'keys'
query = {
"type": "Transcript",
"gene": {
'$in': sorted(keys)
}
}
data = list(collection.find(query))
# Now the results must be returned in the order requested by 'keys'
# Unpack the bulk query results into a list of lists
grouped_docs = defaultdict(list)
for doc in data:
grouped_docs[doc["gene"]].append(doc)
length = len(grouped_docs.keys())
return [grouped_docs[feature_id] for feature_id in keys]
loader = DataLoader(
batch_load_fn=batch_load_fn,
max_batch_size=1000
)
@gene.field('transcripts')
def resolve_gene_transcripts(gene, info):
gene_stable_id = gene["stable_id"]
print(gene_stable_id)
# Tell DataLoader to get this request done when it feels like it
result = loader.load(
key=gene_stable_id
)
return result
@query.field('slice')
def resolve_slice(_, info, region, start, end, feature_type):
result = collection.find({
"type": feature_type,
"slice.region.name": region,
"slice.location.start": {'$gt': start},
"slice.location.end": {'$lt': end}
})
return result
@locus.type_resolver
def resolve_feature_slice(obj, *_):
return obj["type"]
# Give the schema to Ariadne
schema_data = ariadne.load_schema_from_path('schema.sdl')
# Then assign the various "types" for it to connect with resolvers
schema = ariadne.make_executable_schema(
schema_data, query, gene, transcript, exon, locus
)
from ariadne.asgi import GraphQL
import notgood
app = GraphQL(notgood.schema, debug=True)
type Query {
gene(name: String!): Gene!
genes: [Gene!]!
transcripts: [Transcript!]!
transcript(stable_id: String!): Transcript
slice(region: String!, start: Int!, end: Int!, feature_type: String!): [RegionResult!]
}
type Gene {
# The commonly understood concept of gene, with its various transcriptions.
stable_id: String!
name: String!
so_term: String
transcripts: [Transcript!]!
slice: Slice!
}
type Transcript {
# A Transcript of a Gene. Exons are listed in sequence order
stable_id: String!
so_term: String
slice: Slice!
exons: [Exon!]!
}
type Exon {
# An Exon that is part of a Transcript
stable_id: String!
slice: Slice!
}
type Slice {
#The container that combines Region and Location together to define a precise locus. The 'default' key defines whether this is the definitive locus for the parent feature. default:False implies there is another locus which is considered more definitive
region: Region!
location: Location!
default: Boolean
}
type Location {
# A locus associated with a Region. It defines a locality within the Region perhaps corresponding to genomic feature
start: Int!
end: Int!
length: Int!
location_type: String!
}
type Region {
# A large contiguous block of assembled sequence, such as a whole chromosome.
name: String!
strand: Strand
assembly: String!
}
type Strand {
# The direction of reading sense w.r.t. the assembly. 1 = 5'->3', -1 = 3'-5'
code: String
value: Int
}
union RegionResult = Gene | Transcript | Exon
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment