Last active
March 17, 2023 16:55
-
-
Save epaule/ea93cb6bdf625e5293f5713b90fe1da9 to your computer and use it in GitHub Desktop.
rough parser for a ASG contamination file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env crystal | |
require "option_parser" | |
phylum="Arthropoda|insect" | |
dir="20230226_qqAmaFero1.20230225.haplotigs.fa_asg_cobiont_check_run/collected_tables/" | |
OptionParser.parse do |parser| | |
parser.banner = "Usage: filter_merged --phylum xyz --infile <in.merged>" | |
parser.on("-p PHYLUM","--phylum=PHYLUM","Specifies the phylum(s) of the host separated by | [default=#{phylum}]"){|p|phylum=p} | |
parser.on("-d directory","--directory=DIR","merged ASG directory[default=#{dir}]"){|d|dir=d} | |
parser.on("-h", "--help", "Show this help") do | |
puts parser | |
exit | |
end | |
parser.invalid_option do |flag| | |
STDERR.puts "ERROR: #{flag} is not a valid option." | |
STDERR.puts parser | |
exit(1) | |
end | |
end | |
Dir.glob("#{dir}/*vecscreen_contamination").each{|file| | |
hits = [] of String | |
File.each_line(file) do |line| | |
hits << line if /VecScreen/.match(line) | |
end | |
if hits.size > 0 | |
hits << "" | |
puts "========== EUKARYOTE ADAPTOR SCREEN ==========" | |
hits.each{|h|puts h} | |
end | |
} | |
Dir.glob("#{dir}/*contamination_check_merged_table.csv").each{|file| | |
header=0 | |
categories = {} of String => Array(String) | |
File.each_line(file) do |line| | |
header += 1 | |
next if header == 1 | |
next if /#{phylum}/.match(line) | |
columns = line.split(',') | |
next if /no-hit/.match(columns[-1]) | |
categories[columns[-1]]||=[] of String | |
categories[columns[-1]] << columns[0] | |
end | |
categories.each{|k,v| | |
puts "##{k}" | |
v.each{|scaffold| | |
puts "REMOVE\t#{scaffold}" | |
} | |
puts "" | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env ruby | |
require "optionparser" | |
phylum="Arthropoda|insect" | |
dir="20230226_qqAmaFero1.20230225.haplotigs.fa_asg_cobiont_check_run/collected_tables/" | |
OptionParser.new do |parser| | |
parser.banner = "Usage: filter_merged --phylum xyz --infile <in.merged>" | |
parser.on("-p PHYLUM","--phylum=PHYLUM","Specifies the phylum(s) of the host separated by | [default=#{phylum}]"){|p|phylum=p} | |
parser.on("-d DIR","--directory=DIR","merged ASG infile [default=#{file}]"){|d|dir=d} | |
parser.on("-h", "--help", "Show this help") do | |
puts parser | |
exit | |
end | |
end.parse! | |
Dir.glob("#{dir}/*vecscreen_contamination").each{|file| | |
hits = [] | |
File.open(file).each_line do |line| | |
hits << line if /VecScreen/.match(line) | |
end | |
if hits.size > 0 | |
hits << "" | |
puts "========== EUKARYOTE ADAPTOR SCREEN ==========" | |
hits.each{|h|puts h} | |
end | |
} | |
Dir.glob("#{dir}/*contamination_check_merged_table.csv").each{|file| | |
header=0 | |
categories= {} | |
File.open(file).each_line do |line| | |
header += 1 | |
next if header == 1 | |
next if /#{phylum}/.match(line) | |
columns = line.split(',') | |
next if /no-hit/.match(columns[-1]) | |
categories[columns[-1]]||=[] | |
categories[columns[-1]] << columns[0] | |
end | |
categories.each{|k,v| | |
puts "##{k}" | |
v.each{|scaffold| | |
puts "REMOVE\t#{scaffold}" | |
} | |
puts "" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
it skips all lines that match the phyla and the ones with no-hit in as classifier, then pretty-prints the merged classifier.