Skip to content

Instantly share code, notes, and snippets.

@dionisos2
Created April 17, 2019 22:03
Show Gist options
  • Save dionisos2/b9992c9c3f7aaa6869834e551dd555b0 to your computer and use it in GitHub Desktop.
Save dionisos2/b9992c9c3f7aaa6869834e551dd555b0 to your computer and use it in GitHub Desktop.
using BenchmarkTools
NUCLEOTIDES = collect("ACGT")
function build_countnuc(nucleotides)
prog = "vec -> (freqtable = fill(0, $(length(nucleotides)));\n"
prog *= "for n in vec\n"
prog *= "if n == '$(nucleotides[1])'\n freqtable[1] += 1\n"
for (i, nuc) in enumerate(nucleotides[2:end])
prog *= "elseif n == '$nuc'\n freqtable[$(i+1)] += 1 \n"
end
prog *= "else \n error(\"plop\") \n end\n"
prog *= "end; [nuc=>number for (nuc,number) in zip(\"$nucleotides\", freqtable)])"
return eval(Meta.parse(prog))
end
function countnuc0(strand::AbstractString)
r = [[s, 0] for s in NUCLEOTIDES]
for c in strand
if c ∈ NUCLEOTIDES
r[findfirst(r) do p
p[1] == c
end][2] += 1
else
throw(DomainError(c))
end
end
Dict(r)
end
function countnuc1(vec)
freqtable = Dict{Char, Int}(n => 0 for n in NUCLEOTIDES)
for n in vec
n in NUCLEOTIDES || error("plop")
freqtable[n] += 1
end
freqtable
end
countnuc2 = build_countnuc(NUCLEOTIDES)
function bench()
longseq = "A"^1000*"C"^1000*"G"^1000*"T"^1000
# @btime countnuc1($longseq);
@btime countnuc0($longseq);
@btime countnuc1($longseq);
@btime countnuc2($longseq);
nothing
end
#julia> bench()
# 1.205 ms (9978 allocations: 157.44 KiB)
# 95.718 μs (5 allocations: 560 bytes)
# 8.515 μs (5 allocations: 320 bytes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment