Skip to content

Instantly share code, notes, and snippets.

@timbitz
Last active November 7, 2017 19:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timbitz/cb728a9dcddf49f3c3a29e81102f752f to your computer and use it in GitHub Desktop.
Save timbitz/cb728a9dcddf49f3c3a29e81102f752f to your computer and use it in GitHub Desktop.
Bitwise parallel implementations of gc_content and hasambiguity
import BioSequences: index, offset, bitindex
function parallel_gc_content( seq::BioSequence )
gc = 0
s = bitindex(seq, 1)
e = bitindex(seq, length(seq))
for i in index(s):index(e)
@inbounds data = seq.data[i]
# mask nucleotides upstream of first(seq.part)
if i == index(s)
data &= 0xFFFFFFFFFFFFFFFF << offset(s)
end
# mask nucleotides downstream of last(seq.part)
if i == index(e)
data &= 0xFFFFFFFFFFFFFFFF >> (64 - (offset(e)+4))
end
# count unambiguous GC nucleotides
gc += count_ones(data & 0x6666666666666666)
end
return length(seq) > 0 ? gc / length(seq) : 0.0
end
function parallel_hasambiguity( seq::BioSequence )
nucs = 0
s = bitindex(seq, 1)
e = bitindex(seq, length(seq))
for i in index(s):index(e)
@inbounds data = seq.data[i]
# mask nucleotides upstream of first(seq.part)
if i == index(s)
data &= 0xFFFFFFFFFFFFFFFF << offset(s)
end
# mask nucleotides downstream of last(seq.part)
if i == index(e)
data &= 0xFFFFFFFFFFFFFFFF >> (64 - (offset(e)+4))
end
# count
block_ones = count_ones(data)
if block_ones > 16
return true
end
nucs += block_ones
end
return nucs > length(seq) ? true : false
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment