Created
February 21, 2013 16:12
-
-
Save radaniba/5005818 to your computer and use it in GitHub Desktop.
A largely self-explanatory script: This will "shrink" an alignment, deleting all sites that don't contain a polymorphism in some member sequence. A little bit of script candy as well, this takes any number of files and saves the results in a new file named according to a definable schema. Actually, the whole thing is very over-engineered ...
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Reduce a sequence to solely the SNP sites. | |
### IMPORTS | |
require 'test/unit/assertions' | |
require 'optparse' | |
require 'pp' | |
require 'ostruct' | |
require 'date' | |
require 'time' | |
require 'bio' | |
include Test::Unit::Assertions | |
### CONSTANTS & DEFINES | |
### IMPLEMENTATION | |
def interpolate(str, sub_hash) | |
return str.gsub(/\{([^}]+)\}/) { |m| | |
sub_hash[$1] | |
} | |
end | |
### MAIN | |
# Parse commandline arguments. | |
# | |
def parse_clargs(arg_arr) | |
clopts = { | |
:save => "{root}-snps.fasta", | |
:overwrite => false, | |
} | |
pargs = [] | |
OptionParser.new { |opts| | |
opts.program_name = __FILE__ | |
opts.banner = "Reduce a sequence to solely the SNP sites" | |
opts.separator("") | |
opts.separator("Usage: #{opts.program_name} [options] ALN1 ...]") | |
opts.on('-h', '--help', 'Display this screen') { | |
puts opts | |
exit | |
} | |
opts.on('', '--save STR', | |
"Name output files according this template") { |v| | |
clopts[:save] = v | |
} | |
opts.on('-o', '--overwrite', | |
"Overwrite pre-existing files") { | |
clopts[:overwrite] = true | |
} | |
begin | |
opts.parse!(arg_arr) | |
pargs = arg_arr | |
assert(1 <= pargs.length, "need files to work on") | |
rescue Exception => e | |
error_msg = e.to_str.split("\n") | |
print "Error: #{error_msg[0]}\n\n" | |
print opts | |
exit 1 | |
end | |
} | |
return clopts, pargs | |
end | |
# Main script functionality. | |
# | |
def main() | |
clopts, aln_files = parse_clargs(ARGV) | |
aln_files.each { |f| | |
# slurp ... | |
seqs = [] | |
Bio::FlatFile.open(f) { |rdr| | |
seqs = rdr.collect { |rec| | |
rec.to_seq() | |
} | |
} | |
# calculate diffs | |
diff_hash = {} | |
seqs.each { |s| | |
diff_hash[s.entry_id] = "" | |
} | |
seq_len = seqs[0].length() | |
(0...seq_len).each { |i| | |
is_diff = false | |
seqs.each { |s1| | |
seqs.each { |s2| | |
if s1 != s2 | |
if s1[i,1] != s2[i,1] | |
is_diff = true | |
break | |
end | |
end | |
} | |
if is_diff == true | |
break | |
end | |
} | |
if is_diff == true | |
seqs.each { |s| | |
diff_hash[s.entry_id] += s[i,1] | |
} | |
end | |
} | |
# write output | |
# make filename | |
ext = File.extname(f) | |
subs = { | |
"ext" => ext[1, ext.length], | |
"base" => File.basename(f), | |
"root" => File.basename(f, ext), | |
"date" => Date.today.to_s(), | |
"time" => Time.now.strftime(fmt='%T'), | |
"datetime" => DateTime.now.strftime(fmt='%F T%T'), | |
} | |
out_name = interpolate(clopts[:save], subs) | |
# do the writing | |
puts "Saving results to '#{out_name}' ..." | |
if File.exists?(out_name) | |
assert(clopts[:overwrite], "Can't overwrite existing file '#{out_name}'") | |
end | |
File.open(out_name, 'w') { |wrtr| | |
diff_hash.each_pair { |k,v| | |
wrtr << ">#{k}\n" | |
wrtr << "#{v}\n" | |
} | |
} | |
puts "Saved '#{out_name}'." | |
} | |
puts "== Finished." | |
end | |
if $0 == __FILE__ | |
main() | |
end | |
### END |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment