Skip to content

Instantly share code, notes, and snippets.

@kkmehta
Last active August 29, 2015 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kkmehta/0a1dbe272d7ff2aed011 to your computer and use it in GitHub Desktop.
Save kkmehta/0a1dbe272d7ff2aed011 to your computer and use it in GitHub Desktop.
stanford-code
function [newRxns synModelEX] = addEcRxnToSyn(ecModel, synModel, DICT, ID, ...
lb, ub, transYN, verbYN)
% Function: addEcRxnToSyn
% Syntax: addEcRxnToSyn(ecModel, synModel, DICT, rxnID, lb, ub, transportYN, ...
% verboseYN);
% ------------------------------------------------------------------------
% Adds the reaction specified by rxnID from the given E. coli model to the
% given synechocystis model. If transportYN is 1, exchange reactions will
% be added for all metabolites not already in the synechocystis model. If
% verboseYN is 1, assorted messages/warnings will be printed out as needed.
% DICT should be a dictionary translating E. coli metabolites to equivalent
% Synechocystis metabolites. lb and ub are lower and upper bounds for new
% reactions.
%
% returns [newRxns synModelEX], where newRxns is a list of new reactions
% addded, and synModelEX, the extended synechocystis model.
rxn = full(ecModel.S(:,ID));
name = ecModel.rxns{ID};
rEinds = find(rxn < 0); % indices of reactants
rEcoef = rxn(rEinds); % stoichiometric coefficients of reactants
rEname = ecModel.mets(rEinds); % metabolite names of reactants (cell array)
pEinds = find(rxn > 0); % indices of products
pEcoef = rxn(pEinds); % stoichiometric coefficients of products
pEname = ecModel.mets(pEinds); % metabolite names of products (cell array)
rSname = cell(length(rEinds),1);
pSname = cell(length(pEinds),1);
newRxns = {};
synModelEX = synModel;
% Check if a reaction with the same name already exists in syn, and
% if so skip it:
if ~isempty(find(ismember(synModel.rxns, name), 1))
if verbYN
disp('A reaction with the same name (%s) already exists in Syn', name);
end
return
end
% Convert E. coli metabolites to Syn by looking them up in the
% dictionary:
for j = 1:length(rEname)
if ~isKey(DICT, rEname{j})
rSname{j} = rEname{j};
% ...and add an import reaction from oblivion directly to
% the appropriate compartment, if it hasn't already been
% added:
if transYN
nameEX = sprintf('EX-IMP_%s', rEname{j});
if isempty(find(ismember(newRxns, nameEX), 1))
newRxns{length(newRxns)+1, 1} = nameEX;
metsEX = rEname{j};
stocEX = 1;
synModelEX = addReactionTacit(synModelEX, nameEX, {metsEX}, ...
stocEX, 1, lb, ub, 0);
end
end
else
rSname{j} = DICT(rEname{j});
end
end
for j = 1:length(pEname)
if ~isKey(DICT, pEname{j})
pSname{j} = pEname{j};
% ...and add an export reaction from the appropriate
% compartment directly to oblivion, if it hasn't already
% been added:
if transYN
nameEX = sprintf('EX-EXP_%s', pEname{j});
if isempty(find(ismember(newRxns, nameEX), 1))
newRxns{length(newRxns)+1, 1} = nameEX;
metsEX = pEname{j};
stocEX = -1;
synModelEX = addReactionTacit(synModelEX, nameEX, {metsEX}, ...
stocEX, 1, lb, ub, 0);
end
end
else
pSname{j} = DICT(pEname{j});
end
end
% Stoichiometry is the same:
rScoef = rEcoef;
pScoef = pEcoef;
mets = [rSname; pSname];
stoc = [rScoef; pScoef];
lbEC = ecModel.lb(ID);
ubEC = ecModel.ub(ID);
synModelEX = addReactionTacit(synModelEX, name, mets, stoc, 1, lbEC, ubEC, 0);
newRxns{length(newRxns)+1, 1} = name;
end
function TmList = checkMisprime(primer, target, cDNA, T)
%
% function: checkMisprime
% syntax: checkMisprime(primer, target, cDNA, T)
% Kunal Mehta (kkmeht @ gmail), 2013-01-28
% ----------------------------------------------
% checkMisprime looks for possible mispriming of 'primer' against 'target'.
% The output is a plot of the melting temperature of primer against an
% equal-sized window on target, stepping along the entire length of target.
% The Tm is calculated using calc_misprime_delG_KM.m, courtesy of Chris
% VanLang (vanlang @ stanford) and edited by me.
%
% cDNA (the DNA concentration in M) and T (the folding temperature in ∞C)
% are optional; their defaults are 0.5 µM (the default concentration for
% Thermo Scientific's Tm calculator) and 37 ∞C (the default temperature in
% calc_misprime_delG.m.
%
% primer and target can be explicit sequences or names of files in FASTA
% format. Most extraneous characters will be ignored in sequences, but
% do not use a full-stop (.) in sequences entered explicitly.
%
if ~exist('cDNA', 'var'); cDNA = 5E-7; end;
if ~exist('T', 'var'); T = 37; end;
% check if primer and target are explicit or filenames:
if ~isempty(strfind(primer, '.'))
f = fopen(primer);
primerStr = '';
while 1
line = fgetl(f);
if line == -1; break; end
primerStr = [primerStr line];
end
fclose(f);
primer = primerStr;
end
if ~isempty(strfind(target, '.'))
f = fopen(target);
targetStr = '';
while 1
line = fgetl(f);
if line == -1; break; end
targetStr = [targetStr line];
end
fclose(f);
target = targetStr;
end
% clean up 'primer' and 'target':
primer = cleanUp(primer);
primer = revComp(primer);
target = cleanUp(target);
Np = length(primer);
nBinds = length(target) - Np + 1; % number of binding sites along target
TmList = zeros(1, nBinds);
for i = 1:nBinds
subSeq = target(i:i+Np-1);
[~, Tm] = calc_misprime_delG_KM(primer, subSeq, cDNA, T, 0);
if Tm == -999999999; Tm = []; end;
TmList(i) = Tm;
end
plot(TmList)
xlabel('Position on template');
ylabel('Melting temperature');
end
function seqOut = cleanUp(seqIn)
BASES = 'ACGTUacgtu';
N = length(seqIn);
seqOut = blanks(N);
index = 1;
for i=1:N
if ~isempty(strfind(BASES, seqIn(i)))
seqOut(index) = seqIn(i);
index = index + 1;
end
end
while seqOut(end) == ' '
seqOut(end) = '';
end
end
function rc = revComp(seqIn)
BASE_PAIRS = containers.Map;
BASE_PAIRS('A') = 'T';
BASE_PAIRS('C') = 'G';
BASE_PAIRS('T') = 'A';
BASE_PAIRS('G') = 'C';
BASE_PAIRS('U') = 'A';
BASE_PAIRS('a') = 't';
BASE_PAIRS('c') = 'g';
BASE_PAIRS('t') = 'a';
BASE_PAIRS('g') = 'c';
BASE_PAIRS('u') = 'a';
N = length(seqIn);
rc = blanks(N);
for i=1:N
rc(i) = BASE_PAIRS(seqIn(abs(i-N-1)));
end
end
// Provide paths to the directories "input" (with the raw images), "crops" (for cropped photos) and "counts" (for photos with marks on each counted colony)
input = ""
crops = ""
counts = ""
list = getFileList(input);
for (i = 0; i < list.length; i++) {
open(input + list[i]);
run("Flip Vertically");
run("Flip Horizontally");
// These coordinates should describe a rectangle that just includes the plate
makeRectangle(555, 306, 2238, 1857);
run("Crop");
run("16-bit");
run("Subtract Background...", "rolling=50 light");
saveAs("Jpeg", crops + list[i]);
run("8-bit");
setAutoThreshold("Default");
// run("Threshold...");
// Adjust these limits so that colonies are included (highlighted in red) and everything else is excluded
setThreshold(25, 200);
setOption("BlackBackground", false);
run("Convert to Mask");
run("Make Binary");
run("Close-");
run("Analyze Particles...", "size=20-200 circularity=0.7-1.00 show=[Overlay Masks] display summarize in_situ");
saveAs("Jpeg", counts + list[i]);
close();
}
function [seqOut, dG] = optimizeForCFPS(seqIn, CDSyn, Fpyn, NdeIyn, MIN_ENERGY, verbyn)
%
% Function: optimizeForCFPS
% Syntax: [seqOut dG] = optimizeForCFPS(seqIn, CDSyn, Fpyn, ...
% MIN_ENERGY, verbyn)
% -------------------------------------------------------------
% Optimizes the input sequence assuming it is a 5' segment of an
% expression template for cell-free protein synthesis.
%
% The sequence should start at the 5' end of the mRNA transcript
% (omitting any terminal 5' hairpins or stem-loop structures),
% extend through the RBS and include the first few codons of the
% coding sequence.
%
% The program will search for the (most likely) ribosome binding
% site and start codon of the CDS. The RBS is not changed, and
% only silent mutations are made in the CDS. The optimal sequence
% is found using a Metropolis Monte-Carlo algorithm.
%
% If no changes to the coding sequence are desired, set CDSyn to
% 0, otherwise set it to 1. If no changes to the 5' region upstream
% of the CDS are desired, set Fpyn to 0, otherwise set it to 1. The
% program will continue until the maximum number of steps is reached
% (currently set at 1000 internally), or the energy of the optimized
% sequence is greater than MIN_ENERGY. MIN_ENERGY can be optionally
% set in the function call; the default is -0.5 kcal/mol
%
% (Updated 2014-04-23, Julie A Fogarty): If preservation of the NdeI
% site is desired, enter 1 for NdeIyn. If not, enter a 0.
%
% Set verbyn to 1 for verbose output; otherwise 0 (default 0).
%
% The Bioinformatics toolbox is required (I use rnafold to
% calculate the folding energy, and the geneticcode function).
%
% Kunal Mehta, kkmeht@icloud.com
% CONSTANTS
RBS = 'AGGAGGT';
BASES = 'ACGT';
START_CODON = 'ATG';
NdeI = 'CATATG';
MAX_STEPS = 2500;
RT = 0.6;
if ~exist('MIN_ENERGY','var') || isempty(MIN_ENERGY)
MIN_ENERGY = -0.5;
end
if ~exist('verbyn', 'var')
verbyn = 0;
end
% Should make sure we don't generate a new start codon anywhere
seqIn = cleanSeq(seqIn);
len = length(seqIn);
% Find the RBS:
locs = strfind(seqIn, RBS);
if isempty(locs)
fprintf('Could not find an RBS \n');
RBSLoc = [];
else
RBSLoc = locs(end):locs(end) + 6;
if length(locs) > 1
fprintf('Found more than one possible RBS, using the last one \n');
end
if verbyn; fprintf('Found RBS at position %4i \n', RBSLoc(1)); end
end
% Find the NdeI RE site:
locs = strfind(seqIn, NdeI);
if isempty(locs)
fprintf('Could not find an NdeI RE site \n');
NdeILoc = [];
else
NdeILoc = locs(end):locs(end) + 5;
if length(locs) > 1
fprintf('Found more than one possible NdeI RE site, using the last one \n');
end
if verbyn; fprintf('Found NdeI RE site at position %4i \n', NdeILoc(1)); end
end
% Find the start codon:
locs = strfind(seqIn, START_CODON);
if isempty(locs)
fprintf('Could not find a start codon \n');
ATGLoc = [];
else
ATGLoc = locs(1);
if length(locs) > 1
fprintf('Found more than one possible start codon, using the first one \n');
end
if verbyn; fprintf('Found start codon at position %4i \n', ATGLoc); end
end
% Calculate the initial folding energy:
[~, Uinit] = rnafold(seqIn);
if verbyn; fprintf('The initial folding energy is %6.2f kcal/mol \n', Uinit); end
% No point carrying on if structure is already good enough:
if Uinit >= MIN_ENERGY
seqOut = seqIn;
dG = Uinit;
return
end
terminator = 1;
nSteps = 0;
seqCurr = seqIn;
Ucurr = Uinit;
seqBest = '';
Ubest = -9999;
while(terminator)
seqTest = seqCurr;
% Make a random mutation:
if CDSyn && Fpyn; pos = randInt(0, len);
elseif Fpyn; pos = randInt(0, ATGLoc-1);
elseif CDSyn; pos = randInt(ATGLoc-1, len);
end
if ismember(pos, RBSLoc); continue; end
if ismember(pos, NdeILoc) && NdeIyn; continue; end
if pos >= ATGLoc
codonStart = floor((pos - ATGLoc) / 3) * 3 + ATGLoc;
codonRange = codonStart:codonStart + 2;
codon = seqCurr(codonRange);
newCodon = silentMutate(codon);
seqTest(codonRange) = newCodon;
if verbyn
fprintf('Changed codon %3s to %3s at position %3i \n', codon, newCodon, codonStart);
end
else
basesTemp = BASES;
basesTemp(strfind(BASES, seqCurr(pos))) = '';
seqTest(pos) = basesTemp(randInt(0, 3));
if verbyn
fprintf('Changed nucleotide at position %3i \n', pos);
end
end
% Apply the metropolis condition:
[~, Utest] = rnafold(seqTest);
if Utest > Ucurr || rand() < exp(-(Ucurr - Utest)/RT)
seqCurr = seqTest;
Ucurr = Utest;
if verbyn
fprintf(['Accepted new sequence ' seqCurr ' with energy ' num2str(Utest) '\n']);
end
if Ucurr > Ubest
seqBest = seqCurr;
Ubest = Ucurr;
end
end
% Check if we're done:
if nSteps > MAX_STEPS || Ucurr >= MIN_ENERGY
terminator = 0;
seqOut = seqBest;
dG = Ubest;
end
nSteps = nSteps + 1;
end
end
function seqOut = cleanSeq(seqIn)
BASES = 'ACGT';
seqOut = '';
seqIn = upper(seqIn);
for i = 1:length(seqIn)
if ismember(seqIn(i), BASES)
seqOut = [seqOut seqIn(i)];
end
end
end
function codonOut = silentMutate(codonIn)
% Constants:
TRANSLATION_TABLE = geneticcode(1);
CODON_TABLE = struct(...
'R', {{'CGC', 'CGT'}},...
'L', {{'CUG'}},...
'S', {{'AGC', 'TCG', 'AGT', 'TCC', 'TCT'}},...
'A', {{'GCG', 'GCC', 'GCA'}},...
'G', {{'GGC', 'GGT'}},...
'P', {{'CCG'}},...
'T', {{'ACC', 'ACG'}},...
'V', {{'GTG', 'GTT', 'GTC'}},...
'I', {{'ATT', 'ATC'}},...
'N', {{'AAC', 'AAT'}},...
'D', {{'GAT'}},...
'C', {{'TGC', 'TGT'}},...
'Q', {{'CAG'}},...
'E', {{'GAA'}},...
'H', {{'CAT', 'CAC'}},...
'K', {{'AAA'}},...
'F', {{'TTT', 'TTC'}},...
'Y', {{'TAT', 'TAC'}},...
'M', {{'ATG'}},...
'W', {{'TGG'}}...
);
AA = TRANSLATION_TABLE.(codonIn);
choices = CODON_TABLE.(AA);
if length(choices) == 1
codonOut = codonIn;
else
for i = 1:length(choices)
if char(choices{i}) == codonIn; choices(i) = ''; break; end;
end
codonOut = char(choices(randInt(0, length(choices))));
end
end
function rint = randInt(min, max)
rint = ceil(rand() * (max - min) + min);
end
%Dictionaries mapping E. coli metabolites to Syn metabolites:
DICT = DICT_exact;
BATCH_SIZE = 1; % # of reactions at a time
BOUND_L = -100; % default lower bound for new reactions
BOUND_U = 100; % default upper bound for new reactions
ecModel = EcAer;
synModel = synAn;
%%%%%%%%%%%%%%%%%%%%%%%%
% Initialize:
rxnStart = 1;
rxnEnd = BATCH_SIZE;
nRxns = length(EcAer.rxns);
terminator = 0;
changeCobraSolver('glpk');
savedReactionInds = zeros(0, 2);
savedGrowthRates = zeros(0,1);
COUNTER = 1;
while true
% Check if we are at the end of the list of reactions, and set the
% terminator if we are:
if rxnEnd > nRxns
rxnEnd = nRxns;
terminator = 1;
end
synModelTest = synModel;
newRxns = {};
for i = rxnStart : rxnEnd
[addedRxns synModelTest] = addEcRxnToSyn(ecModel, synModelTest, DICT, ...
i, BOUND_L, BOUND_U, 0, 0);
newRxns = [newRxns; addedRxns];
end
% We now have the "fortified" synAn model. Check if it grows:
sol = optimizeCbModel(synModelTest);
if sol.f > 0.0001
dim = size(savedReactionInds);
savedReactionInds(dim(1)+1, :) = [rxnStart rxnEnd];
savedGrowthRates(length(savedGrowthRates)+1, 1) = sol.f;
end
% Check if we're done. If not, increment the reaction set:
if terminator
dim = size(savedReactionInds);
fprintf('\n %i sets of reactions that confer anaerobic growth \n', dim(1))
fprintf('%6s %5s %s \n', 'Start', 'End', 'f');
for j = 1:dim(1)
fprintf('%6i %5i %f \n', savedReactionInds(j,1), savedReactionInds(j,2), ...
savedGrowthRates(j));
end
break
else
rxnStart = rxnStart + BATCH_SIZE;
rxnEnd = rxnEnd + BATCH_SIZE;
end
end
function [DATA,nPlates,nLambdas] = smImport(path)
% [DATA,nPlates,nLambdas] = smImport(path)
%
% Imports the SoftMax Pro file located at <<path>> and stores the data in
% DATA, the number of plates as nPlates, and the number of wavelengths as
% nLambdas.
%
% For now, the function requires that the file be exported from SoftMax in
% "plate" format, opened in Excel and saved as an .xls file.
%
% DATA is a m x n x o x p tensor where
%
% m = 8, the number of rows in a 96-well plate
% n = 12, the number of columns in a 96-well plate
% o = the number of plates (blocks) in the file
% p = the number of wavelengths measured per plate
%% Read in the file:
[N T R] = xlsread(path);
% N contains the numerica data
% T contains the text data
% R contains the raw data
% Determine the number of plates:
blocks = char(T(1,1));
nPlates = str2num(blocks(11:end));
% If there are "notes", delete them:
if strcmp(T(2),'Note:')==1
% At the end of the notes section is an empty row containing '~End'. Find
% all instances of '~End':
ends = find(strcmp(T,'~End'));
%The first '~End' is at the end of the notes:
endNotes = ends(1);
N(2:endNotes,:) = [];
T(2:endNotes,:) = [];
R(2:endNotes,:) = [];
disp('Deleted notes');
nPlates = nPlates - 1;
end
%% We now have a well-structured file. Proceed:
dim = size(N);
nCols = dim(2);
% Determine the number of wavelengths per plate:
nLambdas = floor((nCols-1)/13);
% Initialize DATA
DATA = zeros(8,12,nPlates,nLambdas);
%%
for i = 1:nPlates
for j = 1:nLambdas
rowStart = 12*i-8;
colStart = 13*j-10;
DATA(:,:,i,j) = N(rowStart:rowStart+7,colStart:colStart+11);
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment