Skip to content

Instantly share code, notes, and snippets.

@thinkerbot
Created June 19, 2009 22:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thinkerbot/132915 to your computer and use it in GitHub Desktop.
Save thinkerbot/132915 to your computer and use it in GitHub Desktop.
20090619 Peptide Mass Calculation
require 'ms/in_silico/digester'
require 'ms/fasta/archive'
require 'molecules'
require 'tap'
# PeptideMasses::task
#
# % rap peptide_masses <fasta_file>
#
class PeptideMasses < Tap::Task
Polypeptide = Molecules::Libraries::Polypeptide
UnknownResidueError = Molecules::Libraries::Polypeptide::UnknownResidueError
def process(fasta_file)
output_dir = fasta_file.chomp(File.extname(fasta_file))
if File.exists?(output_dir)
raise "output directory already exists: #{output_dir.inspect}"
end
FileUtils.mkdir_p(output_dir)
Dir.chdir(output_dir)
digester = Ms::InSilico::Digester['Trypsin']
targets = []
unknowns = File.open('unknowns', "w")
start = Time.now
n = 0
m = 0
Ms::Fasta::Archive.open(fasta_file) do |arc|
arc.each_str do |entry|
header, *seq = entry.split(/\r?\n/)
i = 0
digester.digest(seq.join).each do |peptide|
begin
p = Polypeptide.new(peptide)
(targets[p.length] ||= Hash.new(0))[p.mass] += 1
rescue(UnknownResidueError)
unknowns.puts "- [#{m}, #{i}]"
end
i += 1
end
n += 1
m += 1
if n == 10000
log :split, "# #{m} of #{arc.length} (#{Time.now - start}s)"
puts "- split: #{Time.now - start}"
puts " index: #{m}"
dump(targets)
start = Time.now
n = 0
end
end
end
unknowns.close
end
def dump(targets)
start = Time.now
puts "- dump: "
targets.each_index do |index|
next unless masses = targets[index]
sum = 0
data = []
masses.each_pair do |mass, n|
data << mass
data << n
sum += n
end
puts " #{index}: [#{masses.length}, #{sum}]"
File.open(index.to_s, "a") do |io|
io << data.pack("G*")
end
end
targets.clear
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment