Calculation of Mascot Peptide Mass
# These are a number of tasks to calculate peptide masses | |
# using a variety of algorithms. The intent is to find | |
# the algorithm used by Mascot to calculate peptide mass. | |
# | |
# Correct algorithm: unknown | |
# Dependencies: tap, molecules | |
# | |
require 'rubygems' | |
require 'tap' | |
require 'molecules' | |
# UnimodMass::manifest calculates pepmass from unimod masses | |
# | |
# Calculates the mass of a peptide using the | |
# {Unimod masses}[http://www.unimod.org/masses.html] | |
# directly applied to the molecular formula for the | |
# input sequences. | |
# | |
class UnimodMass < Tap::Task | |
Residue = Molecules::Libraries::Residue | |
C = 12 | |
H = 1.007825035 | |
N = 14.003074 | |
O = 15.99491463 | |
S = 31.9720707 | |
config(:nterm, "H") {|formula| Molecules::EmpiricalFormula.parse(formula)} # the n-term formula | |
config(:cterm, "OH") {|formula| Molecules::EmpiricalFormula.parse(formula)} # the c-term formula | |
# Calculates the EmpiricalFormula for the peptide sequence | |
def formula(seq) | |
formula = Molecules::EmpiricalFormula.new | |
seq.each_byte do |byte| | |
formula += Residue.residue_index[byte] | |
end | |
formula | |
end | |
# Calculates the mass of EmpiricalFormula (CHNOS only) | |
def molecule_mass(emperical_formula) | |
h, o, c, n, s = emperical_formula.formula | |
(c || 0) * C + | |
(h || 0) * H + | |
(n || 0) * N + | |
(o || 0) * O + | |
(s || 0) * S | |
end | |
def process(*sequences) | |
termini_mass = molecule_mass(nterm) + molecule_mass(cterm) | |
sequences.collect do |seq| | |
mass = termini_mass + molecule_mass(formula(seq)) | |
log(mass, seq) | |
mass | |
end | |
end | |
def log(mass, seq) | |
puts mass | |
end | |
protected | |
# utility to truncate a mass | |
def trunc(n, precision) # :nodoc: | |
factor = 10**precision.to_i | |
(n * factor).truncate.to_f / factor | |
end | |
# utility to round a mass | |
def round(n, precision) # :nodoc: | |
Molecules::Utils.round(n, precision) | |
end | |
end | |
# TruncResidueMass::manifest calculates pepmass from truncated residue masses | |
# | |
# Calculates the mass of a peptide by calculating the mass of each | |
# residue using the {Unimod masses}[http://www.unimod.org/masses.html], | |
# truncating, and then summing. | |
# | |
class TruncResidueMass < UnimodMass | |
def process(*sequences) | |
termini_mass = trunc(molecule_mass(nterm), 6) + trunc(molecule_mass(cterm), 6) | |
sequences.collect do |seq| | |
mass = termini_mass | |
seq.each_byte do |byte| | |
residue_formula = Residue.residue_index[byte] | |
mass += trunc(molecule_mass(residue_formula), 6) | |
end | |
log(mass, seq) | |
mass | |
end | |
end | |
end | |
# TruncNResidueMass::manifest calculates pepmass from truncated n * residue masses | |
# | |
# Calculates the mass of a peptide by multiplying the residue mass | |
# calculated from {Unimod masses}[http://www.unimod.org/masses.html] | |
# masses by the number of times it occurs, then truncating and summing. | |
# | |
# (This algorithm distingushes between isobaric residues) | |
# | |
class TruncNResidueMass < UnimodMass | |
def process(*sequences) | |
termini_mass = trunc(molecule_mass(nterm), 6) + trunc(molecule_mass(cterm), 6) | |
sequences.collect do |seq| | |
count = Hash.new(0) | |
seq.each_byte do |byte| | |
count[byte] += 1 | |
end | |
mass = termini_mass | |
count.each_pair do |byte, n| | |
residue_formula = Residue.residue_index[byte] | |
mass += trunc(molecule_mass(residue_formula) * n, 6) | |
end | |
log(mass, seq) | |
mass | |
end | |
end | |
end | |
# RoundNResidueMass::manifest calculates pepmass from rounded n * residue masses | |
# | |
# Calculates the mass of a peptide by multiplying the residue mass | |
# calculated from {Unimod masses}[http://www.unimod.org/masses.html] | |
# masses by the number of times it occurs, then rounding and summing. | |
# | |
# (This algorithm distingushes between isobaric residues) | |
# | |
class RoundNResidueMass < UnimodMass | |
def process(*sequences) | |
termini_mass = round(molecule_mass(nterm), 6) + round(molecule_mass(cterm), 6) | |
sequences.collect do |seq| | |
count = Hash.new(0) | |
seq.each_byte do |byte| | |
count[byte] += 1 | |
end | |
mass = termini_mass | |
count.each_pair do |byte, n| | |
residue_formula = Residue.residue_index[byte] | |
mass += round(molecule_mass(residue_formula) * n, 6) | |
end | |
log(mass, seq) | |
mass | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment