Skip to content

Instantly share code, notes, and snippets.

View dkoslicki's full-sized avatar

David Koslicki dkoslicki

View GitHub Profile
@dkoslicki
dkoslicki / TestBias.py
Created April 20, 2017 23:52
Assessing accuracy of Sourmash
import sourmash_lib
import numpy as np
import matplotlib.pyplot as plt
n = 10000 # sequence length
ksize = 10 # k-mer length
h = 5000 # number of hashes in sketch
i_range = range(1, 50000, 100) # range of intersection sizes
true_jaccards = np.zeros(len(i_range))
estimate_jaccards = np.zeros(len(i_range))
@dkoslicki
dkoslicki / JaccardEstimateViaContainment.py
Created April 24, 2017 17:11
Testing typical bottom-k min hash estimate of Jaccard index versus containment estimate of Jaccard index
import sourmash_lib
import numpy as np
import matplotlib.pyplot as plt
from pybloom import BloomFilter
n = 10000 # sequence length
ksize = 10 # k-mer length
h = 100 # number of hashes in sketch
i_range = range(1, 50000, 500) # range of intersection sizes
#i_range = [10000]
@dkoslicki
dkoslicki / retrain_and_test_metalign.sh
Last active January 25, 2020 03:46
Example of how to re-train Metalign, make a small mock community, and then run metalign on that mock community
#!/bin/bash
#set -e
# A basic end-to-end workflow to make sure things are working correctly (esp wrt integration with CMash)
# basic steps:
# make small training database
# re-train Cmash
# make a mock community
# use metalign to profile the mock community (results should be two strains corresponding to the two selected organisms)
# requires bbmap: https://sourceforge.net/projects/bbmap/
@dkoslicki
dkoslicki / dump_kmers.py
Last active January 25, 2020 01:07
Dump the kmers in a CMash database
import sys, os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))+"/CMash/CMash")
from CMash import MinHash as MH
import itertools
training_database = sys.argv[1] # first input is the training file name
dump_file = sys.argv[2] # second input is the desired output dump file
CEs = MH.import_multiple_from_single_hdf5(training_database)
fid = open(dump_file, 'w')
i = 0
for CE in CEs:
@dkoslicki
dkoslicki / test_qg.json
Created April 20, 2020 15:42
Example query
{"_datetime": "2020-04-20 08:38:30", "code_description": "Normal completion", "context": "https://raw.githubusercontent.com/biolink/biolink-model/master/context.jsonld", "id": "https://arax.rtx.ai/api/rtx/v1/message/16", "knowledge_graph": {"edges": [], "nodes": []}, "log": [{"level": 20, "level_str": "INFO", "message": "ARAXQuery launching", "prefix": "2020-04-20 08:38:30.679141 INFO: ", "timestamp": "2020-04-20 08:38:30.679141"}, {"level": 20, "level_str": "INFO", "message": "Examine input query for needed information for dispatch", "prefix": "2020-04-20 08:38:30.679154 INFO: ", "timestamp": "2020-04-20 08:38:30.679154"}, {"level": 20, "level_str": "INFO", "message": "Found input processing plan. Sending to the ProcessingPlanExecutor", "prefix": "2020-04-20 08:38:30.679158 INFO: ", "timestamp": "2020-04-20 08:38:30.679158"}, {"level": 10, "level_str": "DEBUG", "message": "Entering executeProcessingPlan", "prefix": "2020-04-20 08:38:30.679161 DEBUG: ", "timestamp": "2020-04-20 08:38:30.679161"}, {"level": 10
pred = predictor(model_file=pkl_file) # your model file
single_X = [[0.024110625,-0.014302074,0.03327463,0.037940405,-0.008836642,0.016498972,-0.035753097,-0.018181683,-0.04282986,-0.00093017286,-0.020855421,0.09168679,-0.026489392,-0.000757988,0.015053533,-0.03811925,0.105790354,-0.15019746,0.005389204,0.065862186,-0.059054427,0.09367167,-0.07321083,-0.029161578,0.019454233,-0.0025663017,0.13445973,0.034153406,-0.045934483,-0.019593718,0.044405438,-0.0064168656,0.024581399,-0.02436311,-0.02830375,0.038942236,-0.025031557,-0.04817994,0.08156777,-0.006039464,0.03207281,0.010570812,-0.044887736,0.04389168,0.0083243875,0.02332488,0.079191886,-0.015065394,0.059166152,-0.00917501,-0.02219141,0.047573287,-0.0142929265,-0.04038168,-0.004716043,0.029420398,-0.009532481,0.014363899,-0.040293276,-0.026997436,0.117409654,-0.018956954,-0.052647393,0.004364208,0.062688805,-0.0042845616,-0.06180385,0.030208366,0.028081495,0.023202026,-0.009765206,0.021606075,0.004227908,0.043646853,0.011311999,0.06761212,-0.03812019,0.050
@dkoslicki
dkoslicki / BioASQ_with_identifiers.py
Created February 3, 2021 21:54
Simple code to get identifiers from BioASQ questions
import sys
sys.path.append("/home/dkoslicki/Desktop/RTX/code")
sys.path.append("/home/dkoslicki/Desktop/RTX/code/ARAX")
sys.path.append("/home/dkoslicki/Desktop/RTX/code/ARAX/ARAXQuery")
sys.path.append("/home/dkoslicki/Desktop/RTX/code/ARAX/ARAXQuery/Overlay")
sys.path.append("/home/dkoslicki/Desktop/RTX/code/ARAX/ARAXQuery/Overlay/predictor")
sys.path.append("/home/dkoslicki/Desktop/RTX/code/UI/Feedback")
sys.path.append("/home/dkoslicki/Desktop/RTX/code/reasoningtool")
sys.path.append("/home/dkoslicki/Desktop/RTX/code/reasoningtool/kg-construction")
sys.path.append("/home/dkoslicki/Desktop/RTX/code/UI/OpenAPI/python-flask-server")
@dkoslicki
dkoslicki / ARAX_commands.dsl
Created August 11, 2021 18:30
More complicated ARAX commands
add_qnode(key=n0,ids=NCBIGENE:1956,categories=biolink:Gene)
add_qnode(key=n1,categories=biolink:ChemicalSubstance)
add_qedge(key=e0,subject=n0,object=n1,predicates=[biolink:entity_negatively_regulates_entity,biolink:negatively_regulates_entity_to_entity, biolink:decreases_activity_of, biolink:decreases_expression_of,biolink:disrupts,biolink:increases_degradation_of,biolink:negatively_regulates])
# Cytotoxic node
add_qnode(key=n2,ids=UMLS:C1511636,categories=biolink:InformationContentEntity)
# Exclude drugs that are cytotoxic
add_qedge(key=e1,subject=n1,object=n2,exclude=true)
# FDA approved drug
add_qnode(key=n3,ids=MI:2099,categories=biolink:InformationContentEntity)
add_qedge(key=e2,subject=n1,object=n3)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@dkoslicki
dkoslicki / response.json
Last active May 17, 2022 20:11
Creative DTD answer
This file has been truncated, but you can view the full file.
{
"results": [
{
"node_bindings": {
"drug": [
{
"id": "CHEMBL.COMPOUND:CHEMBL1337"
}
],
"disease": [