Skip to content

Instantly share code, notes, and snippets.

@endrebak
Created November 9, 2019 23:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save endrebak/615082362b7a392c74daca0cad760e8d to your computer and use it in GitHub Desktop.
Save endrebak/615082362b7a392c74daca0cad760e8d to your computer and use it in GitHub Desktop.
# wget http://big.databio.org/example_data/AIList/AIListTestData.tgz
nrows = 1.5e6
from ncls import NCLS
from ailist import AIList
import numpy as np
import pandas as pd
from time import time
query_file = "chainRn4.bed"
q = pd.read_csv(query_file, usecols=[1, 2], header=None, dtype=np.int64, nrows=nrows, sep="\t")
def time_ncls(s, q):
start = time()
n = NCLS(s[1].values, s[2].values, s.index.values)
end = time()
print("Building NCLS took", end - start)
n.all_overlaps_both(q[1].values, q[2].values, q.index.values)
end2 = time()
print("Querying NCLS took", end2 - end)
def time_ai(s, q):
start = time()
i = AIList()
i.from_array(s[1].values, s[2].values, s.index.values, np.array(s.index.values, dtype=np.double))
i.construct()
end = time()
print("Building ai took", end - start)
i.intersect_from_array(q[1].values, q[2].values, q.index.values)
end2 = time()
print("Querying ai took", end2 - end)
for subject_file in "chainOrnAna1.bed chainVicPac2.bed chainXenTro3Link.bed chainMonDom5Link.bed".split():
print("------" * 5)
s = pd.read_csv(subject_file, usecols=[1, 2], header=None, dtype=np.int64, nrows=nrows, sep="\t")
info_string = f"Subject file: {subject_file} ({len(s)/1e6}), Query file: {query_file} ({len(q)/1e6})"
print(info_string)
# print(s.head())
# print(s.dtypes)
time_ncls(s, q)
#time_ai_endre(s, q)
time_ai(s, q)
# ------------------------------
# Subject file: chainOrnAna1.bed (1.5), Query file: chainRn4.bed (1.5)
# Building NCLS took 0.37735605239868164
# Querying NCLS took 92.85288500785828
# Building ai took 0.3491032123565674
# Querying ai took 75.86743712425232
# ------------------------------
# Subject file: chainVicPac2.bed (1.5), Query file: chainRn4.bed (1.5)
# Building NCLS took 0.39214491844177246
# Querying NCLS took 92.57744908332825
# Building ai took 0.3954188823699951
# Querying ai took 73.98339509963989
# ------------------------------
# Subject file: chainXenTro3Link.bed (1.5), Query file: chainRn4.bed (1.5)
# Building NCLS took 0.3832888603210449
# Querying NCLS took 33.25918006896973
# Building ai took 0.25615978240966797
# Querying ai took 27.772515296936035
# ------------------------------
# Subject file: chainMonDom5Link.bed (1.5), Query file: chainRn4.bed (1.5)
# Building NCLS took 0.26062488555908203
# Querying NCLS took 10.006813049316406
# Building ai took 0.20964312553405762
# Querying ai took 4.56371283531189
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment