Skip to content

Instantly share code, notes, and snippets.

@soh-i
Last active September 6, 2020 12:24
Show Gist options
  • Save soh-i/355464ce7373af0d94c5833c09766e08 to your computer and use it in GitHub Desktop.
Save soh-i/355464ce7373af0d94c5833c09766e08 to your computer and use it in GitHub Desktop.
Julia練習帳と質問
struct BlastRecord
qseqid::String
qseq::String
sseqid::String
sseq::String
sstrand::String
pident::Float64
qlen::Int64
length::Int64
mismatch::Int64
gapopen::Int64
qstart::Int64
qend::Int64
sstart::Int64
send::Int64
gaps::Int64
evalue::Float64
bitscore::Float64
btop::String
end
function generate_blast_records(blast_file::String)::Dict{String, BlastRecord}
blast_db = Dict{String, BlastRecord}()
open(blast_file, "r") do fh
for line in eachline(fh)
cols = split(line, "\t")
qseqid = cols[1]
qseq = cols[2]
sseqid = cols[3]
sseq = cols[4]
sstrand = cols[5]
pident = parse(Float64, cols[6])
qlen = parse(Int, cols[7])
length = parse(Int, cols[8])
mismatch = parse(Int, cols[9])
gapopen = parse(Int, cols[10])
qstart = parse(Int, cols[11])
qend = parse(Int, cols[12])
sstart = parse(Int, cols[13])
send = parse(Int, cols[14])
gaps = parse(Int, cols[15])
evalue = parse(Float64, cols[16])
bitscore = parse(Float64, cols[17])
btop = cols[18]
record = BlastRecord(qseqid, qseq, sseqid, sseq, sstrand, pident,
qlen, length, mismatch, gapopen, qstart, qend,
sstart, send, gaps, evalue, bitscore, btop)
blast_db[qseqid] = record
end
end
return blast_db
end
@time blast_recs = generate_blast_records("OFF1_lib_R1_1.blast")
for key in keys(blast_recs)
@show(key, blast_recs[key])
end
## CSVでDataFrame型を作成し、これをクエリできるようになった
module Main
using ArgParse
using CSV
using DataFrames
using Printf
struct BlastRecord
qseqid::String
qseq::String
sseqid::String
sseq::String
sstrand::String
pident::Float64
qlen::Int64
length::Int64
mismatch::Int64
gapopen::Int64
qstart::Int64
qend::Int64
sstart::Int64
send::Int64
gaps::Int64
evalue::Float64
bitscore::Float64
btop::String
end
function generate_blast_records(blast_file::String)::Dict{String, BlastRecord}
blast_db = Dict{String, BlastRecord}()
open(blast_file, "r") do fh
for line in eachline(fh)
cols = split(line, "\t")
qseqid = cols[1]
qseq = cols[2]
sseqid = cols[3]
sseq = cols[4]
sstrand = cols[5]
pident = parse(Float64, cols[6])
qlen = parse(Int, cols[7])
length = parse(Int, cols[8])
mismatch = parse(Int, cols[9])
gapopen = parse(Int, cols[10])
qstart = parse(Int, cols[11])
qend = parse(Int, cols[12])
sstart = parse(Int, cols[13])
send = parse(Int, cols[14])
gaps = parse(Int, cols[15])
evalue = parse(Float64, cols[16])
bitscore = parse(Float64, cols[17])
btop = cols[18]
record = BlastRecord(qseqid, qseq, sseqid, sseq, sstrand, pident,
qlen, length, mismatch, gapopen, qstart, qend,
sstart, send, gaps, evalue, bitscore, btop)
blast_db[qseqid] = record
end
end
return blast_db
end
function parse_command_line()::Dict{String, Any}
s = ArgParseSettings(prog="blast_parser.jl", description="Parsing blast output file")
@add_arg_table s begin
"--blast"
help = "Blast output"
required = true
end
return parse_args(s)
end
function load_sample_sheet(file, delim)::DataFrame
CSV.read(file, delim=delim)
end
function find_sample_by_id(sample_sheet::DataFrame, query::String)::DataFrameRow
for sample in eachrow(sample_sheet)
if (sample[:Sample_id] == query)
return sample
end
end
error("query \"$query\" not found in SampleSheet")
end
function collect_sample_id_by_enzyme(sample_sheet::DataFrame, query::String)::Vector{DataFrameRow}
ret = Vector{DataFrameRow}()
for sample in eachrow(sample_sheet)
if (sample[:Enzyme] == query)
push!(ret, sample)
end
end
if length(ret) == 0
error("no query $query found in SampleShet")
else
return ret
end
end
function main()
#args = parse_command_line()
#blast_recs = Blast.generate_blast_records(args["blast"])
#blast_recs = Blast.generate_blast_records("OFF001_WT_rBC.blast")
file_path = "09052020_SampleSheet.tsv"
sample_sheet = Main.load_sample_sheet(file_path, "\t")
q = "v3_QC_1"
ret = Main.find_sample_by_id(sample_sheet, q)
ids = Main.collect_sample_id_by_enzyme(sample_sheet, "TagRFP control")
@info "sample id:" ids[1:5]
end
## main
main()
end
@soh-i
Copy link
Author

soh-i commented Sep 6, 2020

  • エラー処理の作法がいまいちわからない。例えば、なにか文字列処理をしていて結果を関数で返す場合、何もヒットがなかった場合が想定される場合は、関数はつねにUnion{T, Nothing}を返すべきなのかな?
  • 例外はerrorthrowがあるがどっちを使うべき?これって関数のbodyの最後にpythonでいうところのraise ValueError(...)的な感じで使えばいいのかな
  • @infoマクロは便利だ。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment