Skip to content

Instantly share code, notes, and snippets.

@yagays
Created October 11, 2012 06:43
Show Gist options
  • Save yagays/3870629 to your computer and use it in GitHub Desktop.
Save yagays/3870629 to your computer and use it in GitHub Desktop.
SOAPdenovo2のscafStatisticsの統計情報を集計してtsv形式で出力するスクリプト
#/usr/bin/env ruby
require "optparse"
def v(s)
if s == "NaN"
return s
elsif s.include?("%")
return s.gsub("%","").to_f
else
return s.to_i
end
end
def parse_scafstatistics(file)
stats = []
header = []
open(file) { |f|
header << "Filename"
stats << File.basename(file)
f.each_line do |line|
if !line.include?("<--") && line != "\n"
l = line.chomp.split("\t")
if l[0] == "GC_Content"
header << l[0]
stats << v(l[1])
elsif l.length == 3
if l[2].include?("%")
header << l[0].strip
header << l[0].strip + " (%)"
stats << v(l[1])
stats << v(l[2])
else
header << l[0]
header << "Contigs >0 in " + l[0]
stats << v(l[1])
stats << v(l[2])
end
elsif l[0] == "Average_number_of_contigs_per_scaffold"
header << l[0]
stats << l[1].to_f
else
header << l[0]
stats << v(l[1])
end
end
end
}
return header, stats
end
def print_tsv(header, stats, print_all, print_scaffold)
h = []
s = []
if print_scaffold
if print_all
h = header[0..57]
s = stats.map{|a| a[0..57]}
else
h = [header[0..8],header[45]]
s = stats.map{|a| [a[0..8],a[45]]}
end
else
if print_all
h = header[58..header.length]
s = stats.map{|a| a[58..a.length]}
else
h = [header[58..65].flatten,header[98]]
s = stats.map{|a| [a[58..65].flatten,a[98]]}
end
end
puts h.join("\t")
s.each do |f|
puts f.join("\t")
end
end
if __FILE__ == $PROGRAM_NAME
header = []
stats = []
sort_column = nil
print_all = false
print_scaffold = true
ARGV.options do |opt|
opt.on( "-a","--all") { print_all = true }
opt.on( "-s VAL","--sort") { |a| sort_column = a }
opt.on( "-c", "--contig") { print_scaffold = false }
opt.on( "-h","--help") { puts opt ;exit }
opt.parse!
end
Dir.glob(ARGV).each do |f|
h, s = parse_scafstatistics(f)
header = h
stats << s
end
if sort_column
i = header.index(sort_column)
if i == nil
puts "ERROR : unknown column name '#{sort_column}' "
exit 1
end
stats = stats.sort{|a,b| b[i] <=> a[i] }
end
print_tsv(header, stats, print_all, print_scaffold)
end
@yagays
Copy link
Author

yagays commented Oct 11, 2012

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment