Last active
September 6, 2020 12:24
-
-
Save soh-i/355464ce7373af0d94c5833c09766e08 to your computer and use it in GitHub Desktop.
Julia練習帳と質問
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct BlastRecord | |
qseqid::String | |
qseq::String | |
sseqid::String | |
sseq::String | |
sstrand::String | |
pident::Float64 | |
qlen::Int64 | |
length::Int64 | |
mismatch::Int64 | |
gapopen::Int64 | |
qstart::Int64 | |
qend::Int64 | |
sstart::Int64 | |
send::Int64 | |
gaps::Int64 | |
evalue::Float64 | |
bitscore::Float64 | |
btop::String | |
end | |
function generate_blast_records(blast_file::String)::Dict{String, BlastRecord} | |
blast_db = Dict{String, BlastRecord}() | |
open(blast_file, "r") do fh | |
for line in eachline(fh) | |
cols = split(line, "\t") | |
qseqid = cols[1] | |
qseq = cols[2] | |
sseqid = cols[3] | |
sseq = cols[4] | |
sstrand = cols[5] | |
pident = parse(Float64, cols[6]) | |
qlen = parse(Int, cols[7]) | |
length = parse(Int, cols[8]) | |
mismatch = parse(Int, cols[9]) | |
gapopen = parse(Int, cols[10]) | |
qstart = parse(Int, cols[11]) | |
qend = parse(Int, cols[12]) | |
sstart = parse(Int, cols[13]) | |
send = parse(Int, cols[14]) | |
gaps = parse(Int, cols[15]) | |
evalue = parse(Float64, cols[16]) | |
bitscore = parse(Float64, cols[17]) | |
btop = cols[18] | |
record = BlastRecord(qseqid, qseq, sseqid, sseq, sstrand, pident, | |
qlen, length, mismatch, gapopen, qstart, qend, | |
sstart, send, gaps, evalue, bitscore, btop) | |
blast_db[qseqid] = record | |
end | |
end | |
return blast_db | |
end | |
@time blast_recs = generate_blast_records("OFF1_lib_R1_1.blast") | |
for key in keys(blast_recs) | |
@show(key, blast_recs[key]) | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## CSVでDataFrame型を作成し、これをクエリできるようになった | |
module Main | |
using ArgParse | |
using CSV | |
using DataFrames | |
using Printf | |
struct BlastRecord | |
qseqid::String | |
qseq::String | |
sseqid::String | |
sseq::String | |
sstrand::String | |
pident::Float64 | |
qlen::Int64 | |
length::Int64 | |
mismatch::Int64 | |
gapopen::Int64 | |
qstart::Int64 | |
qend::Int64 | |
sstart::Int64 | |
send::Int64 | |
gaps::Int64 | |
evalue::Float64 | |
bitscore::Float64 | |
btop::String | |
end | |
function generate_blast_records(blast_file::String)::Dict{String, BlastRecord} | |
blast_db = Dict{String, BlastRecord}() | |
open(blast_file, "r") do fh | |
for line in eachline(fh) | |
cols = split(line, "\t") | |
qseqid = cols[1] | |
qseq = cols[2] | |
sseqid = cols[3] | |
sseq = cols[4] | |
sstrand = cols[5] | |
pident = parse(Float64, cols[6]) | |
qlen = parse(Int, cols[7]) | |
length = parse(Int, cols[8]) | |
mismatch = parse(Int, cols[9]) | |
gapopen = parse(Int, cols[10]) | |
qstart = parse(Int, cols[11]) | |
qend = parse(Int, cols[12]) | |
sstart = parse(Int, cols[13]) | |
send = parse(Int, cols[14]) | |
gaps = parse(Int, cols[15]) | |
evalue = parse(Float64, cols[16]) | |
bitscore = parse(Float64, cols[17]) | |
btop = cols[18] | |
record = BlastRecord(qseqid, qseq, sseqid, sseq, sstrand, pident, | |
qlen, length, mismatch, gapopen, qstart, qend, | |
sstart, send, gaps, evalue, bitscore, btop) | |
blast_db[qseqid] = record | |
end | |
end | |
return blast_db | |
end | |
function parse_command_line()::Dict{String, Any} | |
s = ArgParseSettings(prog="blast_parser.jl", description="Parsing blast output file") | |
@add_arg_table s begin | |
"--blast" | |
help = "Blast output" | |
required = true | |
end | |
return parse_args(s) | |
end | |
function load_sample_sheet(file, delim)::DataFrame | |
CSV.read(file, delim=delim) | |
end | |
function find_sample_by_id(sample_sheet::DataFrame, query::String)::DataFrameRow | |
for sample in eachrow(sample_sheet) | |
if (sample[:Sample_id] == query) | |
return sample | |
end | |
end | |
error("query \"$query\" not found in SampleSheet") | |
end | |
function collect_sample_id_by_enzyme(sample_sheet::DataFrame, query::String)::Vector{DataFrameRow} | |
ret = Vector{DataFrameRow}() | |
for sample in eachrow(sample_sheet) | |
if (sample[:Enzyme] == query) | |
push!(ret, sample) | |
end | |
end | |
if length(ret) == 0 | |
error("no query $query found in SampleShet") | |
else | |
return ret | |
end | |
end | |
function main() | |
#args = parse_command_line() | |
#blast_recs = Blast.generate_blast_records(args["blast"]) | |
#blast_recs = Blast.generate_blast_records("OFF001_WT_rBC.blast") | |
file_path = "09052020_SampleSheet.tsv" | |
sample_sheet = Main.load_sample_sheet(file_path, "\t") | |
q = "v3_QC_1" | |
ret = Main.find_sample_by_id(sample_sheet, q) | |
ids = Main.collect_sample_id_by_enzyme(sample_sheet, "TagRFP control") | |
@info "sample id:" ids[1:5] | |
end | |
## main | |
main() | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Union{T, Nothing}
を返すべきなのかな?error
かthrow
があるがどっちを使うべき?これって関数のbodyの最後にpythonでいうところのraise ValueError(...)
的な感じで使えばいいのかな@info
マクロは便利だ。