nerdstrike/not_good.py

## not_good.py
import ariadne
from ariadne import QueryType, ObjectType, UnionType
import pymongo
from aiodataloader import DataLoader
from collections import defaultdict

config = {
    'host': 'IPADDRESS',
    'port': 'PORT',
    'user': 'USER',
    'password': 'PASS',
    'db': 'DB'
}


def connect(config):
    "Connect to MongoDB"

    host = config.get("host")
    port = config.get("port")
    user = config.get("user")
    password = config.get("password")
    db = config.get("db")

    client = pymongo.MongoClient(
        host,
        port,
        read_preference=pymongo.ReadPreference.SECONDARY_PREFERRED
    )
    client.admin.authenticate(user, password)
    return client[db]


# Establish query entry point, and types that may be used in the query
# e.g. query { gene { ... }}
query = QueryType()
gene = ObjectType('Gene')
transcript = ObjectType('Transcript')
exon = ObjectType('Exon')
# Special type where data may contain different types of thing
# In this case, looking up features by their location can include
# different feature types at different times
locus = UnionType('RegionResult')

# Get the MongoDB connection and a collection to query
db = connect(config)
collection = db['graphql-test']


# Now we assign resolvers to each object type
# We only need resolvers where additional work is required beyond
# the result the top query returned
@query.field('gene')
def resolve_gene(_, info, name):
    result = collection.find_one({
        "type": "Gene",
        "name": name
    })
    return(result)


# Open ended high cardinality queries are a bad idea, here is one
@query.field('genes')
def resolve_genes(_, info):
    result = collection.find({
        "type": "Gene"
    })
    return result


@query.field('transcripts')
def resolve_transcripts(_, info):
    result = collection.find({
        "type": "Transcript"
    })
    return result


@query.field('transcript')
def resolve_transcript(root, info, stable_id):
    result = collection.find_one({
        "type": "Transcript",
        "stable_id": stable_id
    })
    return result


# Fetching many documents that reference documents will require a special
# kind of resolver. As MongoDB is not relational and we don't want to
# reinvent SQL, we will batch up requests for related documents.
# Without a "DataLoader" to do this, we would need one query per resolve
# Instead we get a 1+ queries that deal with them en-masse
# You could do this in one query in SQL, but it would be odd to follow
# the resolver event loop of GraphQL when you already had everything you
# needed to select out the data you wanted
async def batch_load_fn(keys):
    length = len(keys)

    # DataLoader will aggregate many single ID requests into 'keys'
    query = {
        "type": "Transcript",
        "gene": {
            '$in': sorted(keys)
        }
    }

    data = list(collection.find(query))
    # Now the results must be returned in the order requested by 'keys'
    # Unpack the bulk query results into a list of lists
    grouped_docs = defaultdict(list)

    for doc in data:
        grouped_docs[doc["gene"]].append(doc)
    length = len(grouped_docs.keys())

    return [grouped_docs[feature_id] for feature_id in keys]


loader = DataLoader(
    batch_load_fn=batch_load_fn,
    max_batch_size=1000
)


@gene.field('transcripts')
def resolve_gene_transcripts(gene, info):
    gene_stable_id = gene["stable_id"]
    print(gene_stable_id)

    # Tell DataLoader to get this request done when it feels like it
    result = loader.load(
        key=gene_stable_id
    )
    return result


@query.field('slice')
def resolve_slice(_, info, region, start, end, feature_type):
    result = collection.find({
        "type": feature_type,
        "slice.region.name": region,
        "slice.location.start": {'$gt': start},
        "slice.location.end": {'$lt': end}
    })

    return result


@locus.type_resolver
def resolve_feature_slice(obj, *_):
    return obj["type"]


# Give the schema to Ariadne
schema_data = ariadne.load_schema_from_path('schema.sdl')
# Then assign the various "types" for it to connect with resolvers
schema = ariadne.make_executable_schema(
    schema_data, query, gene, transcript, exon, locus
)

## run_me_in_uvicorn.py
from ariadne.asgi import GraphQL
import notgood

app = GraphQL(notgood.schema, debug=True)

## schema.sdl
type Query {
  gene(name: String!): Gene!
  genes: [Gene!]!
  transcripts: [Transcript!]!
  transcript(stable_id: String!): Transcript
  slice(region: String!, start: Int!, end: Int!, feature_type: String!): [RegionResult!]
}

type Gene {
  # The commonly understood concept of gene, with its various transcriptions.
  stable_id: String!
  name: String!
  so_term: String
  transcripts: [Transcript!]!
  slice: Slice!
}

type Transcript {
  # A Transcript of a Gene. Exons are listed in sequence order
  stable_id: String!
  so_term: String
  slice: Slice!
  exons: [Exon!]!
}

type Exon {
  # An Exon that is part of a Transcript
  stable_id: String!
  slice: Slice!
}

type Slice {
  #The container that combines Region and Location together to define a precise locus. The 'default' key defines whether this is the definitive locus for the parent feature. default:False implies there is another locus which is considered more definitive
  region: Region!
  location: Location!
  default: Boolean
}

type Location {
  # A locus associated with a Region. It defines a locality within the Region perhaps corresponding to genomic feature
  start: Int!
  end: Int!
  length: Int!
  location_type: String!
}

type Region {
  # A large contiguous block of assembled sequence, such as a whole chromosome.
  name: String!
  strand: Strand
  assembly: String!
}

type Strand {
  # The direction of reading sense w.r.t. the assembly. 1 = 5'->3', -1 = 3'-5'
  code: String
  value: Int
}

union RegionResult = Gene | Transcript | Exon
	import ariadne
	from ariadne import QueryType, ObjectType, UnionType
	import pymongo
	from aiodataloader import DataLoader
	from collections import defaultdict

	config = {
	'host': 'IPADDRESS',
	'port': 'PORT',
	'user': 'USER',
	'password': 'PASS',
	'db': 'DB'
	}


	def connect(config):
	"Connect to MongoDB"

	host = config.get("host")
	port = config.get("port")
	user = config.get("user")
	password = config.get("password")
	db = config.get("db")

	client = pymongo.MongoClient(
	host,
	port,
	read_preference=pymongo.ReadPreference.SECONDARY_PREFERRED
	)
	client.admin.authenticate(user, password)
	return client[db]


	# Establish query entry point, and types that may be used in the query
	# e.g. query { gene { ... }}
	query = QueryType()
	gene = ObjectType('Gene')
	transcript = ObjectType('Transcript')
	exon = ObjectType('Exon')
	# Special type where data may contain different types of thing
	# In this case, looking up features by their location can include
	# different feature types at different times
	locus = UnionType('RegionResult')

	# Get the MongoDB connection and a collection to query
	db = connect(config)
	collection = db['graphql-test']


	# Now we assign resolvers to each object type
	# We only need resolvers where additional work is required beyond
	# the result the top query returned
	@query.field('gene')
	def resolve_gene(_, info, name):
	result = collection.find_one({
	"type": "Gene",
	"name": name
	})
	return(result)


	# Open ended high cardinality queries are a bad idea, here is one
	@query.field('genes')
	def resolve_genes(_, info):
	result = collection.find({
	"type": "Gene"
	})
	return result


	@query.field('transcripts')
	def resolve_transcripts(_, info):
	result = collection.find({
	"type": "Transcript"
	})
	return result


	@query.field('transcript')
	def resolve_transcript(root, info, stable_id):
	result = collection.find_one({
	"type": "Transcript",
	"stable_id": stable_id
	})
	return result


	# Fetching many documents that reference documents will require a special
	# kind of resolver. As MongoDB is not relational and we don't want to
	# reinvent SQL, we will batch up requests for related documents.
	# Without a "DataLoader" to do this, we would need one query per resolve
	# Instead we get a 1+ queries that deal with them en-masse
	# You could do this in one query in SQL, but it would be odd to follow
	# the resolver event loop of GraphQL when you already had everything you
	# needed to select out the data you wanted
	async def batch_load_fn(keys):
	length = len(keys)

	# DataLoader will aggregate many single ID requests into 'keys'
	query = {
	"type": "Transcript",
	"gene": {
	'$in': sorted(keys)
	}
	}

	data = list(collection.find(query))
	# Now the results must be returned in the order requested by 'keys'
	# Unpack the bulk query results into a list of lists
	grouped_docs = defaultdict(list)

	for doc in data:
	grouped_docs[doc["gene"]].append(doc)
	length = len(grouped_docs.keys())

	return [grouped_docs[feature_id] for feature_id in keys]


	loader = DataLoader(
	batch_load_fn=batch_load_fn,
	max_batch_size=1000
	)


	@gene.field('transcripts')
	def resolve_gene_transcripts(gene, info):
	gene_stable_id = gene["stable_id"]
	print(gene_stable_id)

	# Tell DataLoader to get this request done when it feels like it
	result = loader.load(
	key=gene_stable_id
	)
	return result


	@query.field('slice')
	def resolve_slice(_, info, region, start, end, feature_type):
	result = collection.find({
	"type": feature_type,
	"slice.region.name": region,
	"slice.location.start": {'$gt': start},
	"slice.location.end": {'$lt': end}
	})

	return result


	@locus.type_resolver
	def resolve_feature_slice(obj, *_):
	return obj["type"]


	# Give the schema to Ariadne
	schema_data = ariadne.load_schema_from_path('schema.sdl')
	# Then assign the various "types" for it to connect with resolvers
	schema = ariadne.make_executable_schema(
	schema_data, query, gene, transcript, exon, locus
	)
	from ariadne.asgi import GraphQL
	import notgood

	app = GraphQL(notgood.schema, debug=True)
	type Query {
	gene(name: String!): Gene!
	genes: [Gene!]!
	transcripts: [Transcript!]!
	transcript(stable_id: String!): Transcript
	slice(region: String!, start: Int!, end: Int!, feature_type: String!): [RegionResult!]
	}

	type Gene {
	# The commonly understood concept of gene, with its various transcriptions.
	stable_id: String!
	name: String!
	so_term: String
	transcripts: [Transcript!]!
	slice: Slice!
	}

	type Transcript {
	# A Transcript of a Gene. Exons are listed in sequence order
	stable_id: String!
	so_term: String
	slice: Slice!
	exons: [Exon!]!
	}

	type Exon {
	# An Exon that is part of a Transcript
	stable_id: String!
	slice: Slice!
	}

	type Slice {
	#The container that combines Region and Location together to define a precise locus. The 'default' key defines whether this is the definitive locus for the parent feature. default:False implies there is another locus which is considered more definitive
	region: Region!
	location: Location!
	default: Boolean
	}

	type Location {
	# A locus associated with a Region. It defines a locality within the Region perhaps corresponding to genomic feature
	start: Int!
	end: Int!
	length: Int!
	location_type: String!
	}

	type Region {
	# A large contiguous block of assembled sequence, such as a whole chromosome.
	name: String!
	strand: Strand
	assembly: String!
	}

	type Strand {
	# The direction of reading sense w.r.t. the assembly. 1 = 5'->3', -1 = 3'-5'
	code: String
	value: Int
	}

	union RegionResult = Gene \| Transcript \| Exon