Skip to content

Instantly share code, notes, and snippets.

@dalexander
Last active December 15, 2015 11:49
Show Gist options
  • Save dalexander/5256218 to your computer and use it in GitHub Desktop.
Save dalexander/5256218 to your computer and use it in GitHub Desktop.
projectIntoRange speedups
"""
Find coverage in [winStart, winEnd) implied by tStart, tEnd
vectors.
Original from rangeQueries, and two attempts at speeding it up.
In the common case I could imagine projectIntoRangeFast2 being fastest,
but on my amplicons test case projectIntoRangeFast1 wins.
"""
def projectIntoRange(tStart, tEnd, rangeStart, rangeEnd):
assert(len(tStart) == len(tEnd))
res = n.array([0] * (rangeEnd - rangeStart))
for i in range(0, len(tStart)):
s = max(rangeStart, tStart[i]) - rangeStart
e = min(rangeEnd, tEnd[i]) - rangeStart
if (e > s):
res[s:e] = res[s:e] + 1
return(res)
def projectIntoRangeFast1(tStart, tEnd, winStart, winEnd):
# An attempt at speeding up RQ.projectIntoRange
assert(len(tStart) == len(tEnd))
res = np.zeros(shape=winEnd-winStart, dtype=np.uint)
# Clip to window and translate
tStart_ = np.maximum(winStart, tStart) - winStart
tEnd_ = np.minimum(winEnd, tEnd) - winStart
for (s, e) in zip(tStart_, tEnd_):
res[s:e] += 1
return res
def projectIntoRangeFast2(tStart, tEnd, winStart, winEnd):
# FASTER
# Make the first differences, then integrate
assert(len(tStart) == len(tEnd))
deltas = np.zeros(shape=winEnd-winStart, dtype=np.int)
# Clip to window and translate
tStart_ = np.maximum(winStart, tStart) - winStart
tEnd_ = np.minimum(winEnd, tEnd) - winStart
for (s, e) in zip(tStart_, tEnd_):
deltas[s] += 1
deltas[e-1] -= 1
return np.cumsum(deltas)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment