Skip to content

Instantly share code, notes, and snippets.

View cthoyt's full-sized avatar

Charles Tapley Hoyt cthoyt

View GitHub Profile
@cthoyt
cthoyt / find_missing_bananas.py
Created September 13, 2022 10:31
Find resources in the Bioregistry whose regular expression patterns contain a banana (i.e., a redundant prefix or prefix synonym) for curation.
import bioregistry
def main():
"""Identify resources with uncurated bananas."""
for resource in bioregistry.resources():
pattern = resource.get_pattern()
if not pattern:
continue
for peel in ":_-":
@cthoyt
cthoyt / extract_jcheminform_dois.py
Created August 25, 2022 11:59
Extract Journal of Cheminformatics DOIs from https://github.com/egonw/jcheminform-kb
"""Get Journal of Cheminformatics DOIs."""
import requests
import pystow
MODULE = pystow.module("jcheminf", "volumes")
USER = "egonw" # alternative use first-party jcheminform if it ever gets updated
REPO = "jcheminform-kb"
BRANCH = "main"
PREFIX = "https://doi.org/10.1186/"
@cthoyt
cthoyt / extract_obo_dependencies.py
Created July 5, 2022 17:04
Extract the dependencies of each OBO Foundry Ontology
"""Update the dependencies."""
import json
from pathlib import Path
from typing import Iterable, Optional
import bioontologies
import bioregistry
import click
from bioontologies.obograph import Graph
@cthoyt
cthoyt / make_author_list.py
Created June 2, 2022 10:41
Generate author list text from a Google Sheet, originally used for the Bioregistry paper.
"""Generate author list text from a Google Sheet."""
from operator import itemgetter
import click
import pandas as pd
ROLE = {"Lead": 0, "Senior": 2}
SUPERSCRIPTS = {
@cthoyt
cthoyt / ground_odisea.ipynb
Last active May 5, 2022 13:45
Ground ODiseA
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@cthoyt
cthoyt / bioregistry_potential_mappings.py
Created February 14, 2022 14:46
How many potential mappings are in the Bioregistry?
"""Calculate the number of possible mappings between bioregistry entries."""
from itertools import combinations
from bioregistry.external.getters import GETTERS
from bioregistry.version import VERSION
from humanize import intword
def main():
@cthoyt
cthoyt / wd-publication-ego.sparql
Created February 12, 2022 21:51
Make a graph of your papers, co-authors, and topics by quering Wikidata
#defaultView:Graph
SELECT ?s ?sLabel ?rgb ?edgelabel ?o ?oLabel
WHERE {
{
SELECT ?s ?sLabel ?rgb ?o ?oLabel
WHERE {
?o wdt:P50 wd:Q47475003, ?s .
FILTER (?s != wd:Q47475003)
OPTIONAL {
@cthoyt
cthoyt / pykeen_uncertainty_demo.ipynb
Last active January 5, 2022 11:01
PyKEEN Uncertainty Demo
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@cthoyt
cthoyt / download_reactome.py
Created November 3, 2021 20:08
Download and parse all of Reactome with PyBioPAX
from pathlib import Path
import pandas as pd
import pybiopax
ALL_PATHWAYS = "https://reactome.org/download/current/ReactomePathways.txt"
def download_reactome(directory):