Created
November 9, 2019 23:18
-
-
Save endrebak/615082362b7a392c74daca0cad760e8d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# wget http://big.databio.org/example_data/AIList/AIListTestData.tgz | |
nrows = 1.5e6 | |
from ncls import NCLS | |
from ailist import AIList | |
import numpy as np | |
import pandas as pd | |
from time import time | |
query_file = "chainRn4.bed" | |
q = pd.read_csv(query_file, usecols=[1, 2], header=None, dtype=np.int64, nrows=nrows, sep="\t") | |
def time_ncls(s, q): | |
start = time() | |
n = NCLS(s[1].values, s[2].values, s.index.values) | |
end = time() | |
print("Building NCLS took", end - start) | |
n.all_overlaps_both(q[1].values, q[2].values, q.index.values) | |
end2 = time() | |
print("Querying NCLS took", end2 - end) | |
def time_ai(s, q): | |
start = time() | |
i = AIList() | |
i.from_array(s[1].values, s[2].values, s.index.values, np.array(s.index.values, dtype=np.double)) | |
i.construct() | |
end = time() | |
print("Building ai took", end - start) | |
i.intersect_from_array(q[1].values, q[2].values, q.index.values) | |
end2 = time() | |
print("Querying ai took", end2 - end) | |
for subject_file in "chainOrnAna1.bed chainVicPac2.bed chainXenTro3Link.bed chainMonDom5Link.bed".split(): | |
print("------" * 5) | |
s = pd.read_csv(subject_file, usecols=[1, 2], header=None, dtype=np.int64, nrows=nrows, sep="\t") | |
info_string = f"Subject file: {subject_file} ({len(s)/1e6}), Query file: {query_file} ({len(q)/1e6})" | |
print(info_string) | |
# print(s.head()) | |
# print(s.dtypes) | |
time_ncls(s, q) | |
#time_ai_endre(s, q) | |
time_ai(s, q) | |
# ------------------------------ | |
# Subject file: chainOrnAna1.bed (1.5), Query file: chainRn4.bed (1.5) | |
# Building NCLS took 0.37735605239868164 | |
# Querying NCLS took 92.85288500785828 | |
# Building ai took 0.3491032123565674 | |
# Querying ai took 75.86743712425232 | |
# ------------------------------ | |
# Subject file: chainVicPac2.bed (1.5), Query file: chainRn4.bed (1.5) | |
# Building NCLS took 0.39214491844177246 | |
# Querying NCLS took 92.57744908332825 | |
# Building ai took 0.3954188823699951 | |
# Querying ai took 73.98339509963989 | |
# ------------------------------ | |
# Subject file: chainXenTro3Link.bed (1.5), Query file: chainRn4.bed (1.5) | |
# Building NCLS took 0.3832888603210449 | |
# Querying NCLS took 33.25918006896973 | |
# Building ai took 0.25615978240966797 | |
# Querying ai took 27.772515296936035 | |
# ------------------------------ | |
# Subject file: chainMonDom5Link.bed (1.5), Query file: chainRn4.bed (1.5) | |
# Building NCLS took 0.26062488555908203 | |
# Querying NCLS took 10.006813049316406 | |
# Building ai took 0.20964312553405762 | |
# Querying ai took 4.56371283531189 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment