Created
January 15, 2014 03:41
-
-
Save jckantor/8430394 to your computer and use it in GitHub Desktop.
Calculate the molecular weight of a species given a chemical formula.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function mw = molweight(varargin) | |
% MOLWEIGHT Computes the molecular weights for a set of chemical species. | |
% | |
% SYNTAX | |
% | |
% mw = molweight(formula) | |
% mw = molweight(species) | |
% mw = molweigth(r) | |
% | |
% Computes the molecular weight of a chemical species. The species may be | |
% specified as a chemical formula, a cell array of chemical formulas or | |
% as a structure array of atomic representations. If there are no output | |
% arguments then a table of molecular weights is displayed. | |
% | |
% | |
% EXAMPLES | |
% | |
% 1. Molecular weight of methane | |
% | |
% mw = molweight('CH4') | |
% | |
% 2. Molecular weight of methane using a structure array | |
% | |
% r.C = 1; | |
% r.H = 4; | |
% molweight(r); | |
% | |
% 3. Molecular weights of set of compounds | |
% | |
% molweight({'CH4','O2','CO2','H2O'}); | |
% | |
% AUTHOR | |
% | |
% Jeff Kantor | |
% December 18, 2010 | |
% This file is part of larger project for developing Matlab based tools | |
% for undergraduate use in Chemical Engineering. The following data | |
% structure was adapted from that project. | |
persistent ppds; | |
if isempty(ppds) | |
element = @(str, atomicnumber, amu) amu; | |
ppds.M = element('any metal', 0, NaN); | |
ppds.X = element('any halogen', 0, NaN); | |
ppds.H = element('hydrogen atom', 1, 1.00794); | |
ppds.D = element('deuterium', 1, 2.01410178); | |
ppds.T = element('tritium', 1, 3.0160492); | |
ppds.He = element('helium', 2, 4.0026002); | |
ppds.Li = element('lithium', 3, 6.941); | |
ppds.Be = element('beryllium', 4, 9.012182); | |
ppds.B = element('boron', 5, 10.811); | |
ppds.C = element('carbon', 6, 12.011); | |
ppds.N = element('nitrogen atom', 7, 14.00674); | |
ppds.O = element('oxygen atom', 8, 15.9994); | |
ppds.F = element('flourine', 9, 18.99840); | |
ppds.Ne = element('neon', 10, 20.1797); | |
ppds.Na = element('sodium', 11, 22.989768); | |
ppds.Mg = element('magnesium', 12, 24.3050); | |
ppds.Al = element('aluminium', 13, 26.981539); | |
ppds.Si = element('silicon', 14, 28.0855); | |
ppds.P = element('phosphorus', 15, 30.97362); | |
ppds.S = element('sulfur', 16, 32.066); | |
ppds.Cl = element('chlorine atom', 17, 35.4527); | |
ppds.Ar = element('argon', 18, 39.948); | |
ppds.K = element('potassium', 19, 39.0983); | |
ppds.Ca = element('calcium', 20, 40.078); | |
ppds.Sc = element('scandium', 21, 44.95591); | |
ppds.Ti = element('titanium', 22, 47.88); | |
ppds.V = element('vanadium', 23, 50.9415); | |
ppds.Cr = element('chromium', 24, 51.9961); | |
ppds.Mn = element('manganese', 25, 54.93085); | |
ppds.Fe = element('iron', 26, 55.847); | |
ppds.Co = element('cobalt', 27, 58.9332); | |
ppds.Ni = element('nickel', 28, 58.69); | |
ppds.Cu = element('copper', 29, 63.546); | |
ppds.Zn = element('zinc', 30, 65.39); | |
ppds.Ga = element('gallium', 31, 69.723); | |
ppds.Ge = element('germanium', 32, 72.61); | |
ppds.As = element('arsenic', 33, 74.92159); | |
ppds.Se = element('selenium', 34, 78.96); | |
ppds.Br = element('bromine', 35, 79.904); | |
ppds.Kr = element('krypton', 36, 83.80); | |
ppds.Rb = element('rubidium', 37, 85.4678); | |
ppds.Sr = element('strontium', 38, 87.62); | |
ppds.Y = element('yttrium', 39, 88.90585); | |
ppds.Zr = element('zirconium', 40, 91.224); | |
ppds.Nb = element('niobium', 41, 92.90638); | |
ppds.Mo = element('molybdenum', 42, 95.94); | |
ppds.Tc = element('technetium', 43, 98); | |
ppds.Ru = element('ruthenium', 44, 101.070); | |
ppds.Rh = element('rhodium', 45, 102.9055); | |
ppds.Pd = element('palladium', 46, 106.42); | |
ppds.Ag = element('silver', 47, 107.8682); | |
ppds.Cd = element('cadmium', 48, 112.411); | |
ppds.In = element('indium', 49, 114.82); | |
ppds.Sn = element('tin', 50, 118.71); | |
ppds.Sb = element('antimony', 51, 121.75); | |
ppds.Te = element('tellurium', 52, 127.60); | |
ppds.I = element('iodine', 53, 126.90447); | |
ppds.Xe = element('xenon', 54, 131.29); | |
ppds.Cs = element('cesium', 55, 132.90543); | |
ppds.Ba = element('barium', 56, 137.327); | |
ppds.La = element('lanthanum', 57, 138.9055); | |
ppds.Ce = element('cerium', 58, 140.115); | |
ppds.Pr = element('praseodymium', 59, 140.90765); | |
ppds.Nd = element('neodymium', 60, 144.24); | |
ppds.Pm = element('promethium', 61, 145); | |
ppds.Sm = element('samarium', 62, 150.36); | |
ppds.Eu = element('europium', 63, 151.965); | |
ppds.Gd = element('gadolinium', 64, 157.25); | |
ppds.Tb = element('terbium', 65, 158.92534); | |
ppds.Dy = element('dysprosium', 66, 162.50); | |
ppds.Ho = element('holmium', 67, 164.92032); | |
ppds.Er = element('erbium', 68, 167.26); | |
ppds.Tm = element('thulium', 69, 168.93421); | |
ppds.Yb = element('ytterbium', 70, 173.04); | |
ppds.Lu = element('lutetium', 71, 174.967); | |
ppds.Hf = element('hafnium', 72, 178.49); | |
ppds.Ta = element('tantalum', 73, 180.9479); | |
ppds.W = element('tungsten', 74, 183.85); | |
ppds.Re = element('rhenium', 75, 186.207); | |
ppds.Os = element('osmium', 76, 190.2); | |
ppds.Ir = element('iridium', 77, 192.22); | |
ppds.Pt = element('platinum', 78, 195.09); | |
ppds.Au = element('gold', 79, 196.96654); | |
ppds.Hg = element('mercury', 80, 200.59); | |
ppds.Tl = element('thallium', 81, 204.3833); | |
ppds.Pb = element('lead', 82, 207.2); | |
ppds.Bi = element('bismuth', 83, 208.98037); | |
ppds.Po = element('polonium', 84, 209); | |
ppds.At = element('astatine', 85, 210); | |
ppds.Rn = element('radon', 86, 222); | |
ppds.Fr = element('francium', 87, 223); | |
ppds.Ra = element('radium', 88, 226.025); | |
ppds.Ac = element('actinium', 89, 227.028); | |
ppds.Th = element('thorium', 90, 232.0381); | |
ppds.Pa = element('protactinium', 91, 231.03588); | |
ppds.U = element('uranium', 92, 238.0289); | |
ppds.Np = element('neptunium', 93, 237.0482); | |
ppds.Pu = element('plutonium', 94, 244); | |
ppds.Am = element('americium', 95, 243); | |
ppds.Cm = element('curium', 96, 247); | |
ppds.Bk = element('berkelium', 97, 247); | |
ppds.Cf = element('californium', 98, 251); | |
ppds.Es = element('einsteinium', 99, 252); | |
ppds.Fm = element('fermium', 100, 257); | |
ppds.Q = element('charge', 0, 0); | |
ppds.e = element('electron', 0, 0); | |
end | |
assert(nargin > 0, 'molweight:input', ['No input. Expects a cell ', ... | |
'array of formulas or struct array of atoms.']); | |
assert(nargin < 2, 'molweight:input', 'Unexpected extra inputs.'); | |
% Process function argument to produce a cell array of chemical | |
% formulas and structure array of atomic representations. | |
switch class(varargin{1}) | |
case 'char' % Single formula | |
species = varargin; | |
r = parse_formula(species); | |
case 'cell' % Cell array of formulas | |
species = varargin{1}; | |
r = parse_formula(species); | |
case 'struct' % Structure array | |
r = varargin{1}; | |
species = hillformula(r); | |
otherwise | |
error('molweight:input',['requires cell array of chemical ',... | |
'formulas or a structure array of atomic representations']); | |
end | |
% For each element of the structure array, compute a molecular weight | |
mw = zeros(size(r)); | |
atoms = fields(r); | |
for n = 1:size(r(:)) | |
for i = 1:length(atoms) | |
% The following check is needed to avoid add adding NaN in | |
% cases where M or X appear in atoms. | |
if r(n).(atoms{i}) > 0 | |
mw(n) = mw(n) + ppds.(atoms{i})*r(n).(atoms{i}); | |
end | |
end | |
end | |
% If no outputs, then display results | |
if nargout == 0 | |
fprintf('\n'); | |
fprintf('%-25s %8s\n','Species','Mol. Wt.'); | |
fprintf('%-25s %8s\n','-------','--------'); | |
for n = 1:size(mw(:)) | |
fprintf('%-25s %8.2f\n',species{n},mw(n)); | |
end | |
end | |
end | |
function r = parse_formula(varargin) | |
% PARSE_FORMULA Parses a chemical formula to form an atomic representation. | |
% | |
% SYNTAX | |
% | |
% r = parse_formula(str) | |
% r = parse_formula({str1,str2,str3, ...}) | |
% | |
% Parses chemical formulas and returns a structure array holding the an | |
% atomic representation of the chemical formulae. The input is a string | |
% or a cell array of strings. | |
% | |
% | |
% EXAMPLES | |
% | |
% 1. Chemical formulas of varying complexity | |
% | |
% parse_formula('H2O'); % Water | |
% parse_formula('NaHCO3'); % Sodium Bicarbonate | |
% parse_formula('(CH4)8(H2O)46'); % Methane Clathrate | |
% parse_formula('CH3COOCH2CH3'); % Ethyl Acetate | |
% parse_formula('MnO4-'); % Negative Charge Ion | |
% | |
% parse_formula('dCH4'); % Returns error message | |
% | |
% 2. Create an structure array of atomic representations for a set of | |
% compounds | |
% | |
% r = parse_formula({'CH4','O2','CO2','H2O'}); | |
% | |
% | |
% USAGE NOTES | |
% | |
% 1. Formulas are made of up of sequences of elements followed by | |
% integers indicating the number of included atoms. Omitted integers | |
% are assumed to be one. | |
% | |
% 2. Elements are the conventional one or two character abbreviations. | |
% The character is captialized. If present, the second character is | |
% lower case. In addition to the standard elements, the parser allows | |
% for | |
% | |
% Symbol Entity Interpretation | |
% e electron like an element with MW = 0 | |
% D deuterium an element | |
% T tritium an element | |
% M any metal like an element, mw = NaN | |
% X any halogen like an element, mw = NaN | |
% Me methyl group (CH3) CH3 substituted for Me | |
% Et ethyl group (C2H5) C2H5 substituted for Et | |
% Bu butyl group (C4H9) C4H9 substituted for Bu | |
% Ph phenol group (C6H5) C6H5 substituted for Ph | |
% | |
% 3. Subgroups may be included between parenthesis or brackets followed | |
% by an integer indicating number of repetitions. Two levels of | |
% subgrouping are allowed. | |
% | |
% 4. A terminal lower case suffix denoting phases will be correctly | |
% parsed. The phase must be one of (aq), (l), (g), or (s). | |
% | |
% 5. The charge on an ionic species is appended as a + or - followed by | |
% an optional integer. Examples are H+, OH-, or Ca+2. | |
% | |
% 6. The bare electron e- is used in balancing chemical half reactions. | |
% | |
% 7. Error messages are generated for invalid fomulas | |
% | |
% 8. str can be a cell array of chemical formula. The results is a | |
% structure array. The elements of the output structure array are in | |
% one-to-one correspondence with elements of the cell array. For | |
% example | |
% | |
% r = parse_formula({'CH4','CH3OH','CHOOH'}) | |
% | |
% r(1) holds the atomic formula for CH4, r(2) for CH3OH, and r(3) for | |
% CHOOH. | |
% AUTHOR | |
% | |
% Jeff Kantor | |
% December 18, 2010 | |
assert(nargin > 0, 'parse_formula:input', ['No input. Expects a ', ... | |
'string or cell array of chemical formulas.']); | |
assert(nargin < 2, 'stoich:input', 'Unexpected extra inputs.'); | |
switch class(varargin{1}) | |
case 'char' % Single formula | |
str = varargin; | |
case 'cell' % Cell array of formulas | |
str = varargin{1}; | |
otherwise | |
error('parse_formula:input',['requires cell array of ',... | |
'chemical formulas.']); | |
end | |
assert(iscellstr(str), 'parse_formula:input', ... | |
'Formulas must be strings.'); | |
% Trim any whitespace at front or back | |
str = strtrim(str); | |
% Remove phase information. This information is currently neglected. In | |
% a later version we may wish to incorporate phase into a more complete | |
% data structure for representing chemical formula. | |
prex = '|\((aq|g|l|s)\)$'; | |
str = regexprep(str,prex,''); | |
% Substitute for some common chemical abbreviations | |
str = regexprep(str,'Bu','C4H9'); % Butyl | |
str = regexprep(str,'Et','C2H5'); % Ethyl | |
str = regexprep(str,'Me','CH3'); % Methyl | |
str = regexprep(str,'Ph','C6H5'); % Phenol | |
% Apply the main parser to every element of str | |
q = cellfun(@(s)parse_formula_(s,3),str,'Uniform',false); | |
% Union of all atomic species | |
atoms = {}; | |
for i = 1:length(q(:)) | |
atoms = union(atoms, fields(q{i})); | |
end | |
% Add all atomic species to all structures. | |
for i = 1:length(q(:)) | |
for j = 1:length(atoms) | |
if ~ismember(atoms{j},fields(q{i})) | |
q{i}.(atoms{j}) = 0; | |
end | |
end | |
end | |
% Form the structure array to have the same shape as str | |
r = reshape([q{:}],size(str)); | |
end % parse_formula | |
function r = parse_formula_(str,kdepth) | |
assert(kdepth > 0, 'parse_formula_:Recursion', ... | |
'Reached maximum recursion depth'); | |
r = struct([]); | |
% Regular expression returning tokens for element and number | |
% sexpr matches single elements followed by a digit, or a +/- | |
% followed by a digit to denote charge | |
persistent srex; % Regexp pattern to match elements and charges | |
persistent grex; % Regexp pattern to match groups | |
if isempty(srex) || isempty(grex) | |
srex = ['(A[lrsgutcm]|B[eraik]?|C[laroudsemf]?|D[y]?|E[urs]|', ... | |
'F[erm]?|G[aed]|H[eofgas]?|I[nr]?|Kr?|L[iaur]|', ... | |
'M[gnodt]?|N[eaibdpos]?|Os?|P[drmtboau]?|R[buhenaf]|', ... | |
'S[icernbmg]?|T[icebmalh]?|U|V|W|X[e]?|Yb?|Z[nr])', ... | |
'(\d*\.\d+|\d*)', ... | |
'|(e|+|-)(\d*)']; | |
grex = '|\(([^\)]*)\)(\d*\.\d+|\d*)|\[([^\]]*)\](\d*\.\d+|\d*)'; | |
end | |
% Parse formula for chemical groups. This picks out anything that looks | |
% an element followed by a number, or a subgroup within parentheses. | |
% The tokens are returned in the cell array u. Each u{k} has two | |
% elements, the first is a string denoting the group, and the second is | |
% number string of repetitions. | |
[u,s,e] = regexp(str,[srex,grex],'tokens','start','end'); | |
% Report any parsing errors. A parse error occurs if there are any | |
% characters not matched as tokens. We scan the start and end positions | |
% of the tokens to determine if there are any gaps. | |
g(1:length(str)) = '^'; | |
for i = 1:length(s); | |
g(s(i):e(i)) = ' '; | |
end | |
assert(all(g ~= '^'), 'parse_formula:ParseError', ... | |
'Could not parse formula:\n %s\n %s\n', str, char(g)); | |
% Extract atom tokens from the first part of each token | |
tok = cellfun(@(v)v{1},u,'Uni',false); | |
% Extract counts from the second part of each token, convert to | |
% doubles, empty counts set to 1 | |
cnt = cellfun(@(v)v{2},u,'Uni',false); | |
cnt = str2double(cnt); | |
cnt(isnan(cnt)) = 1; | |
% Loop over tokens | |
for j = 1:length(u) | |
% See if token matches an element | |
if strcmp(tok{j},regexp(tok{j},srex,'match')) | |
% The token exactly matches an element. | |
% Change + or - tokens to 'Q'. | |
tok{j} = regexprep(tok{j},'+','Q'); | |
if strcmp(tok{j}, '-') | |
tok{j} = 'Q'; | |
cnt(j) = -cnt(j); | |
end | |
% Update atomic representation, adding a field if needed. | |
if isfield(r,tok{j}) | |
r.(tok{j}) = r.(tok{j}) + cnt(j); | |
else | |
r(1).(tok{j}) = cnt(j); | |
end | |
else | |
% The token must be a group, so do a recursion to find | |
% an atomic represenation of the group. | |
q = parse_formula_(tok{j},kdepth-1); | |
% Updatethe atomic representation to include the group. | |
% Add fields if needed. Multiply by number of groups in the | |
% formula we're parsing. | |
f = fields(q); | |
for k = 1:length(f) | |
if isfield(r,f{k}) | |
r.(f{k}) = r.(f{k}) + cnt(j)*q.(f{k}); | |
else | |
r(1).(f{k}) = cnt(j)*q.(f{k}); | |
end | |
end | |
end | |
end | |
end % parse_formula_ | |
function species = hillformula(varargin) | |
% | |
% HILLFORMULA Produce a cell array of chemical formulas in Hill Notation. | |
% | |
% species = hillformula(r) | |
% species = hillformula(species) | |
% | |
% Construct a cell array of chemical formula strings in Hill notation. | |
% The input is either a cell array of chemical formulas or a structure | |
% array of atomic representations. | |
% | |
% | |
% EXAMPLES | |
% | |
% 1. Construct formula for methane | |
% | |
% r.C = 1; | |
% r.H = 4; | |
% hillformula(r); | |
% | |
% 2. Roundtrip construction for methanol | |
% | |
% r = parse_formula('CH3OH'); | |
% hillformula(r) | |
% | |
% 3. Convert formula to Hill notation | |
% | |
% sp = hillformula('H2SO4'); | |
% | |
% | |
% USAGE NOTES | |
% | |
% 1. Starting with a chemical formula s1, | |
% | |
% s2 = hillformula(s1) | |
% | |
% projects the chemical onto a simpler representation s2. This is not | |
% unique, there may be multiple formulas (isomers) that result in the | |
% same string s2. | |
% | |
% 2. Starting with an atomic representation r1, the combination | |
% | |
% s = hillformula(r1) | |
% r2 = parse_formula(s) | |
% | |
% will return the same atomic representation. | |
% Author | |
% | |
% Jeff Kantor | |
% December 18, 2010 | |
assert(nargin > 0, 'hillfomula:input', ['No input. Expects a ', ... | |
'cell array of formulas or struct array of atoms.']); | |
assert(nargin < 2, 'hillfomula:input', 'Unexpected extra inputs.'); | |
% Process function argument to produce a cell array of chemical | |
% formulas and structure array of atomic representations. | |
switch class(varargin{1}) | |
case 'char' % Single formula | |
r = parse_formula(varargin); | |
case 'cell' % Cell array of formulas | |
r = parse_formula(varargin{1}); | |
case 'struct' % Structure array | |
r = varargin{1}; | |
otherwise | |
error('hillfomula:input',['requires cell array of chemical ',... | |
'formulas or a structure array of atomic representations']); | |
end | |
% Use hillformula_ to perform calculations for each element of array r | |
species = arrayfun(@hillformula_,r,'UniformOutput',false); | |
end | |
function str = hillformula_(r) | |
% Put fields in alphabetical order | |
atoms = sort(fields(r)); | |
% The fields of r is union set of atoms present in the various chemical | |
% species. We obtain these fields and put them in Hill order. | |
if ismember('C',atoms) && (r.C > 0) | |
a = intersect({'C','H','D','T'},atoms); | |
b = setdiff(atoms,{'C','H','D','T'}); | |
atoms = [a(:); b(:)]; | |
end | |
% If Q is present, then Q always goes to the end of the line | |
if ismember('Q',atoms) && (r.Q ~= 0) | |
a = setdiff(atoms,{'Q'}); | |
atoms = [a(:); 'Q']; | |
end | |
str = ''; | |
for k = 1:length(atoms); | |
switch atoms{k} | |
case 'Q' | |
if r.(atoms{k}) <= -2 | |
str = strcat(str,num2str(r.(atoms{k}))); | |
elseif r.(atoms{k}) == -1 | |
str = strcat(str,'-'); | |
elseif r.(atoms{k}) == 1 | |
str = strcat(str,'+'); | |
elseif r.(atoms{k}) >= 2 | |
str = strcat(str,'+',num2str(r.(atoms{k}))); | |
end | |
otherwise | |
if (r.(atoms{k}) > 0) && (r.(atoms{k}) ~= 1) | |
str = strcat(str,atoms{k},num2str(r.(atoms{k}))); | |
elseif r.(atoms{k}) == 1 | |
str = strcat(str,atoms{k}); | |
end | |
end | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment