Skip to content

Instantly share code, notes, and snippets.

@odinokov
Last active August 21, 2024 11:05
Show Gist options
  • Save odinokov/89085c9aa71c3f9afc06a1d4bc4fd33a to your computer and use it in GitHub Desktop.
Save odinokov/89085c9aa71c3f9afc06a1d4bc4fd33a to your computer and use it in GitHub Desktop.
The script fetches and prints genetic variants from the gnomAD database for a specified genomic region
# The script fetches and prints genetic variants from the gnomAD database for a specified genomic region.
# Adapted from https://gist.github.com/ressy/6fd7f6ee6401ac8e703dc2709399869e
import re
import sys
from pprint import PrettyPrinter
from typing import Any, Dict, Tuple
import fire
import requests
import tenacity
from loguru import logger
class GnomadClient:
def __init__(
self,
url: str = "https://gnomad.broadinstitute.org/api",
dataset: str = "gnomad_r4",
reference_genome: str = "GRCh38",
max_retries: int = 3,
retry_delay: int = 2,
verbose: bool = False,
):
self.url = url
self.dataset = dataset
self.reference_genome = reference_genome
self.max_retries = max_retries
self.retry_delay = retry_delay
self._configure_logging(verbose)
def _configure_logging(self, verbose: bool) -> None:
"""
Configures logging based on verbosity level.
"""
logger.remove() # Remove the default logger configuration
level = "DEBUG" if verbose else "INFO"
# Add colorful logging output to the console
logger.add(
sys.stdout,
level=level,
colorize=True,
# format="<green>{time}</green> <level>{message}</level>",
)
def fetch(self, jsondata: Dict[str, Any]) -> Dict[str, Any]:
"""
Sends a POST request to the specified URL with the provided JSON data, with retry logic.
"""
@tenacity.retry(
wait=tenacity.wait_fixed(self.retry_delay),
stop=tenacity.stop_after_attempt(self.max_retries),
reraise=True,
)
def _make_request():
logger.debug(f"Sending request to {self.url} with data: {jsondata}")
response = requests.post(
self.url, json=jsondata, headers={"Content-Type": "application/json"}
)
response.raise_for_status()
return self._handle_response(response)
return _make_request()
def _handle_response(self, response: requests.Response) -> Dict[str, Any]:
"""
Handles the response, checking for errors in the JSON data.
"""
response_json = response.json()
if "errors" in response_json:
raise Exception(str(response_json["errors"]))
return response_json
@staticmethod
def parse_region(region: str) -> Tuple[str, int, int]:
"""
Parses a genomic region string into chromosome, start, and stop positions using regular expressions.
Removes any "chr" prefix before parsing.
Parameters:
- region: Genomic region in the format 'chromosome:start-stop'
Returns:
- Tuple of (chromosome, start, stop) positions.
"""
region = region.lstrip("chr") # Remove "chr" prefix if present
match = re.match(
r"^(?P<chromosome>[^:]+):(?P<start>\d+)-(?P<stop>\d+)$", region
)
if not match:
raise ValueError(
f"Invalid region format: '{region}'. Expected format 'chromosome:start-stop'."
)
return (
match.group("chromosome"),
int(match.group("start")),
int(match.group("stop")),
)
def get_variant_list(self, region: str) -> Any:
"""
Fetches the list of variants for a given genomic region.
Parameters:
- region: Genomic region in the format 'chromosome:start-stop'
Returns:
- List of variants in the specified region.
"""
chrom, start, stop = self.parse_region(region)
query = f"""
{{
region(chrom: "{chrom}", start: {start}, stop: {stop}, reference_genome: {self.reference_genome}) {{
variants(dataset: {self.dataset}) {{
consequence
pos
rsid
variant_id: variantId
exome {{
ac
an
af
}}
genome {{
ac
an
af
}}
allele: alt
reference: ref
}}
}}
}}
"""
return self.fetch({"query": query, "variables": {}})["data"]["region"][
"variants"
]
def main(
region: str,
dataset: str = "gnomad_r4",
reference_genome: str = "GRCh38",
max_retries: int = 3,
retry_delay: int = 2,
verbose: bool = False,
):
"""
Fetches and prints genetic variants from the gnomAD database for a specified genomic region.
Parameters:
- region: Genomic region in the format 'chromosome:start-stop'
- dataset: Dataset name (default: "gnomad_r4")
- reference_genome: Reference genome (default: "GRCh38")
- max_retries: Maximum number of retry attempts (default: 3)
- retry_delay: Delay between retries in seconds (default: 2)
- verbose: Verbose output for debugging (default: False)
"""
try:
client = GnomadClient(
dataset=dataset,
reference_genome=reference_genome,
max_retries=max_retries,
retry_delay=retry_delay,
verbose=verbose,
)
variants = client.get_variant_list(region)
PrettyPrinter(indent=2, width=40, depth=None, compact=False).pprint(*variants)
except Exception as e:
logger.error(f"An error occurred: {e}")
if __name__ == "__main__":
fire.Fire(main)
# https://gnomad.broadinstitute.org/region/12-67868595-67868595?dataset=gnomad_r4
# fetch_gnomad.py --region chr12:67868595-67868595
# { 'allele': 'T',
# 'consequence': 'intron_variant',
# 'exome': None,
# 'genome': { 'ac': 3,
# 'af': 1.974957538412924e-05,
# 'an': 151902},
# 'pos': 67868595,
# 'reference': 'C',
# 'rsid': 'rs945346526',
# 'variant_id': '12-67868595-C-T'}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment