Skip to content

Instantly share code, notes, and snippets.

@adamewing
Last active September 26, 2018 05:57
Show Gist options
  • Save adamewing/7fe6a71356abfb1881c20c620622f8e9 to your computer and use it in GitHub Desktop.
Save adamewing/7fe6a71356abfb1881c20c620622f8e9 to your computer and use it in GitHub Desktop.
gvcf coverage
#!/usr/bin/env julia
import Printf: @printf
import Base: parse
function contig_dict(vcf_fn)
ctgs = Dict{String,Int64}()
open(vcf_fn) do vcf
for line in eachline(vcf)
if occursin(r"^##contig=", line)
line = replace(line, r"<|>" => "")
line = replace(line, r"," => "=")
ctg_name = split(line, "=")[3]
ctg_len = parse(Int64, split(line, "=")[5])
ctgs[ctg_name] = ctg_len
end
if !occursin(r"^#", line)
return ctgs
end
end
end
ctgs
end
function main()
if length(ARGS) < 1
println("no VCF file passed as argument!")
exit(1)
end
ctgs = contig_dict(ARGS[1])
cover = Dict{String,Int64}()
open(ARGS[1]) do vcf
for line in eachline(vcf)
if !occursin(r"^#", line)
c = split(line, "\t")
chr = c[1]
pos = parse(Int64, c[2])
info = c[8]
stop = pos
for i in split(info, ";")
if occursin(r"^END=", i)
stop = parse(Int64, split(i, "=")[2])
end
end
cov_len = stop-pos+1
if haskey(cover, chr)
cover[chr] += cov_len
else
cover[chr] = cov_len
end
end
end
end
totalctg = 0
totalcov = 0
for chr in keys(ctgs)
totalctg += ctgs[chr]
if haskey(cover, chr)
totalcov += cover[chr]
end
end
totalfrac = Float64(totalcov)/Float64(totalctg)
@printf("%s\t%f\n", ARGS[1], totalfrac)
end
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment