Last active
August 29, 2015 14:12
-
-
Save kkmehta/0a1dbe272d7ff2aed011 to your computer and use it in GitHub Desktop.
stanford-code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function [newRxns synModelEX] = addEcRxnToSyn(ecModel, synModel, DICT, ID, ... | |
lb, ub, transYN, verbYN) | |
% Function: addEcRxnToSyn | |
% Syntax: addEcRxnToSyn(ecModel, synModel, DICT, rxnID, lb, ub, transportYN, ... | |
% verboseYN); | |
% ------------------------------------------------------------------------ | |
% Adds the reaction specified by rxnID from the given E. coli model to the | |
% given synechocystis model. If transportYN is 1, exchange reactions will | |
% be added for all metabolites not already in the synechocystis model. If | |
% verboseYN is 1, assorted messages/warnings will be printed out as needed. | |
% DICT should be a dictionary translating E. coli metabolites to equivalent | |
% Synechocystis metabolites. lb and ub are lower and upper bounds for new | |
% reactions. | |
% | |
% returns [newRxns synModelEX], where newRxns is a list of new reactions | |
% addded, and synModelEX, the extended synechocystis model. | |
rxn = full(ecModel.S(:,ID)); | |
name = ecModel.rxns{ID}; | |
rEinds = find(rxn < 0); % indices of reactants | |
rEcoef = rxn(rEinds); % stoichiometric coefficients of reactants | |
rEname = ecModel.mets(rEinds); % metabolite names of reactants (cell array) | |
pEinds = find(rxn > 0); % indices of products | |
pEcoef = rxn(pEinds); % stoichiometric coefficients of products | |
pEname = ecModel.mets(pEinds); % metabolite names of products (cell array) | |
rSname = cell(length(rEinds),1); | |
pSname = cell(length(pEinds),1); | |
newRxns = {}; | |
synModelEX = synModel; | |
% Check if a reaction with the same name already exists in syn, and | |
% if so skip it: | |
if ~isempty(find(ismember(synModel.rxns, name), 1)) | |
if verbYN | |
disp('A reaction with the same name (%s) already exists in Syn', name); | |
end | |
return | |
end | |
% Convert E. coli metabolites to Syn by looking them up in the | |
% dictionary: | |
for j = 1:length(rEname) | |
if ~isKey(DICT, rEname{j}) | |
rSname{j} = rEname{j}; | |
% ...and add an import reaction from oblivion directly to | |
% the appropriate compartment, if it hasn't already been | |
% added: | |
if transYN | |
nameEX = sprintf('EX-IMP_%s', rEname{j}); | |
if isempty(find(ismember(newRxns, nameEX), 1)) | |
newRxns{length(newRxns)+1, 1} = nameEX; | |
metsEX = rEname{j}; | |
stocEX = 1; | |
synModelEX = addReactionTacit(synModelEX, nameEX, {metsEX}, ... | |
stocEX, 1, lb, ub, 0); | |
end | |
end | |
else | |
rSname{j} = DICT(rEname{j}); | |
end | |
end | |
for j = 1:length(pEname) | |
if ~isKey(DICT, pEname{j}) | |
pSname{j} = pEname{j}; | |
% ...and add an export reaction from the appropriate | |
% compartment directly to oblivion, if it hasn't already | |
% been added: | |
if transYN | |
nameEX = sprintf('EX-EXP_%s', pEname{j}); | |
if isempty(find(ismember(newRxns, nameEX), 1)) | |
newRxns{length(newRxns)+1, 1} = nameEX; | |
metsEX = pEname{j}; | |
stocEX = -1; | |
synModelEX = addReactionTacit(synModelEX, nameEX, {metsEX}, ... | |
stocEX, 1, lb, ub, 0); | |
end | |
end | |
else | |
pSname{j} = DICT(pEname{j}); | |
end | |
end | |
% Stoichiometry is the same: | |
rScoef = rEcoef; | |
pScoef = pEcoef; | |
mets = [rSname; pSname]; | |
stoc = [rScoef; pScoef]; | |
lbEC = ecModel.lb(ID); | |
ubEC = ecModel.ub(ID); | |
synModelEX = addReactionTacit(synModelEX, name, mets, stoc, 1, lbEC, ubEC, 0); | |
newRxns{length(newRxns)+1, 1} = name; | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function TmList = checkMisprime(primer, target, cDNA, T) | |
% | |
% function: checkMisprime | |
% syntax: checkMisprime(primer, target, cDNA, T) | |
% Kunal Mehta (kkmeht @ gmail), 2013-01-28 | |
% ---------------------------------------------- | |
% checkMisprime looks for possible mispriming of 'primer' against 'target'. | |
% The output is a plot of the melting temperature of primer against an | |
% equal-sized window on target, stepping along the entire length of target. | |
% The Tm is calculated using calc_misprime_delG_KM.m, courtesy of Chris | |
% VanLang (vanlang @ stanford) and edited by me. | |
% | |
% cDNA (the DNA concentration in M) and T (the folding temperature in ∞C) | |
% are optional; their defaults are 0.5 µM (the default concentration for | |
% Thermo Scientific's Tm calculator) and 37 ∞C (the default temperature in | |
% calc_misprime_delG.m. | |
% | |
% primer and target can be explicit sequences or names of files in FASTA | |
% format. Most extraneous characters will be ignored in sequences, but | |
% do not use a full-stop (.) in sequences entered explicitly. | |
% | |
if ~exist('cDNA', 'var'); cDNA = 5E-7; end; | |
if ~exist('T', 'var'); T = 37; end; | |
% check if primer and target are explicit or filenames: | |
if ~isempty(strfind(primer, '.')) | |
f = fopen(primer); | |
primerStr = ''; | |
while 1 | |
line = fgetl(f); | |
if line == -1; break; end | |
primerStr = [primerStr line]; | |
end | |
fclose(f); | |
primer = primerStr; | |
end | |
if ~isempty(strfind(target, '.')) | |
f = fopen(target); | |
targetStr = ''; | |
while 1 | |
line = fgetl(f); | |
if line == -1; break; end | |
targetStr = [targetStr line]; | |
end | |
fclose(f); | |
target = targetStr; | |
end | |
% clean up 'primer' and 'target': | |
primer = cleanUp(primer); | |
primer = revComp(primer); | |
target = cleanUp(target); | |
Np = length(primer); | |
nBinds = length(target) - Np + 1; % number of binding sites along target | |
TmList = zeros(1, nBinds); | |
for i = 1:nBinds | |
subSeq = target(i:i+Np-1); | |
[~, Tm] = calc_misprime_delG_KM(primer, subSeq, cDNA, T, 0); | |
if Tm == -999999999; Tm = []; end; | |
TmList(i) = Tm; | |
end | |
plot(TmList) | |
xlabel('Position on template'); | |
ylabel('Melting temperature'); | |
end | |
function seqOut = cleanUp(seqIn) | |
BASES = 'ACGTUacgtu'; | |
N = length(seqIn); | |
seqOut = blanks(N); | |
index = 1; | |
for i=1:N | |
if ~isempty(strfind(BASES, seqIn(i))) | |
seqOut(index) = seqIn(i); | |
index = index + 1; | |
end | |
end | |
while seqOut(end) == ' ' | |
seqOut(end) = ''; | |
end | |
end | |
function rc = revComp(seqIn) | |
BASE_PAIRS = containers.Map; | |
BASE_PAIRS('A') = 'T'; | |
BASE_PAIRS('C') = 'G'; | |
BASE_PAIRS('T') = 'A'; | |
BASE_PAIRS('G') = 'C'; | |
BASE_PAIRS('U') = 'A'; | |
BASE_PAIRS('a') = 't'; | |
BASE_PAIRS('c') = 'g'; | |
BASE_PAIRS('t') = 'a'; | |
BASE_PAIRS('g') = 'c'; | |
BASE_PAIRS('u') = 'a'; | |
N = length(seqIn); | |
rc = blanks(N); | |
for i=1:N | |
rc(i) = BASE_PAIRS(seqIn(abs(i-N-1))); | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Provide paths to the directories "input" (with the raw images), "crops" (for cropped photos) and "counts" (for photos with marks on each counted colony) | |
input = "" | |
crops = "" | |
counts = "" | |
list = getFileList(input); | |
for (i = 0; i < list.length; i++) { | |
open(input + list[i]); | |
run("Flip Vertically"); | |
run("Flip Horizontally"); | |
// These coordinates should describe a rectangle that just includes the plate | |
makeRectangle(555, 306, 2238, 1857); | |
run("Crop"); | |
run("16-bit"); | |
run("Subtract Background...", "rolling=50 light"); | |
saveAs("Jpeg", crops + list[i]); | |
run("8-bit"); | |
setAutoThreshold("Default"); | |
// run("Threshold..."); | |
// Adjust these limits so that colonies are included (highlighted in red) and everything else is excluded | |
setThreshold(25, 200); | |
setOption("BlackBackground", false); | |
run("Convert to Mask"); | |
run("Make Binary"); | |
run("Close-"); | |
run("Analyze Particles...", "size=20-200 circularity=0.7-1.00 show=[Overlay Masks] display summarize in_situ"); | |
saveAs("Jpeg", counts + list[i]); | |
close(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function [seqOut, dG] = optimizeForCFPS(seqIn, CDSyn, Fpyn, NdeIyn, MIN_ENERGY, verbyn) | |
% | |
% Function: optimizeForCFPS | |
% Syntax: [seqOut dG] = optimizeForCFPS(seqIn, CDSyn, Fpyn, ... | |
% MIN_ENERGY, verbyn) | |
% ------------------------------------------------------------- | |
% Optimizes the input sequence assuming it is a 5' segment of an | |
% expression template for cell-free protein synthesis. | |
% | |
% The sequence should start at the 5' end of the mRNA transcript | |
% (omitting any terminal 5' hairpins or stem-loop structures), | |
% extend through the RBS and include the first few codons of the | |
% coding sequence. | |
% | |
% The program will search for the (most likely) ribosome binding | |
% site and start codon of the CDS. The RBS is not changed, and | |
% only silent mutations are made in the CDS. The optimal sequence | |
% is found using a Metropolis Monte-Carlo algorithm. | |
% | |
% If no changes to the coding sequence are desired, set CDSyn to | |
% 0, otherwise set it to 1. If no changes to the 5' region upstream | |
% of the CDS are desired, set Fpyn to 0, otherwise set it to 1. The | |
% program will continue until the maximum number of steps is reached | |
% (currently set at 1000 internally), or the energy of the optimized | |
% sequence is greater than MIN_ENERGY. MIN_ENERGY can be optionally | |
% set in the function call; the default is -0.5 kcal/mol | |
% | |
% (Updated 2014-04-23, Julie A Fogarty): If preservation of the NdeI | |
% site is desired, enter 1 for NdeIyn. If not, enter a 0. | |
% | |
% Set verbyn to 1 for verbose output; otherwise 0 (default 0). | |
% | |
% The Bioinformatics toolbox is required (I use rnafold to | |
% calculate the folding energy, and the geneticcode function). | |
% | |
% Kunal Mehta, kkmeht@icloud.com | |
% CONSTANTS | |
RBS = 'AGGAGGT'; | |
BASES = 'ACGT'; | |
START_CODON = 'ATG'; | |
NdeI = 'CATATG'; | |
MAX_STEPS = 2500; | |
RT = 0.6; | |
if ~exist('MIN_ENERGY','var') || isempty(MIN_ENERGY) | |
MIN_ENERGY = -0.5; | |
end | |
if ~exist('verbyn', 'var') | |
verbyn = 0; | |
end | |
% Should make sure we don't generate a new start codon anywhere | |
seqIn = cleanSeq(seqIn); | |
len = length(seqIn); | |
% Find the RBS: | |
locs = strfind(seqIn, RBS); | |
if isempty(locs) | |
fprintf('Could not find an RBS \n'); | |
RBSLoc = []; | |
else | |
RBSLoc = locs(end):locs(end) + 6; | |
if length(locs) > 1 | |
fprintf('Found more than one possible RBS, using the last one \n'); | |
end | |
if verbyn; fprintf('Found RBS at position %4i \n', RBSLoc(1)); end | |
end | |
% Find the NdeI RE site: | |
locs = strfind(seqIn, NdeI); | |
if isempty(locs) | |
fprintf('Could not find an NdeI RE site \n'); | |
NdeILoc = []; | |
else | |
NdeILoc = locs(end):locs(end) + 5; | |
if length(locs) > 1 | |
fprintf('Found more than one possible NdeI RE site, using the last one \n'); | |
end | |
if verbyn; fprintf('Found NdeI RE site at position %4i \n', NdeILoc(1)); end | |
end | |
% Find the start codon: | |
locs = strfind(seqIn, START_CODON); | |
if isempty(locs) | |
fprintf('Could not find a start codon \n'); | |
ATGLoc = []; | |
else | |
ATGLoc = locs(1); | |
if length(locs) > 1 | |
fprintf('Found more than one possible start codon, using the first one \n'); | |
end | |
if verbyn; fprintf('Found start codon at position %4i \n', ATGLoc); end | |
end | |
% Calculate the initial folding energy: | |
[~, Uinit] = rnafold(seqIn); | |
if verbyn; fprintf('The initial folding energy is %6.2f kcal/mol \n', Uinit); end | |
% No point carrying on if structure is already good enough: | |
if Uinit >= MIN_ENERGY | |
seqOut = seqIn; | |
dG = Uinit; | |
return | |
end | |
terminator = 1; | |
nSteps = 0; | |
seqCurr = seqIn; | |
Ucurr = Uinit; | |
seqBest = ''; | |
Ubest = -9999; | |
while(terminator) | |
seqTest = seqCurr; | |
% Make a random mutation: | |
if CDSyn && Fpyn; pos = randInt(0, len); | |
elseif Fpyn; pos = randInt(0, ATGLoc-1); | |
elseif CDSyn; pos = randInt(ATGLoc-1, len); | |
end | |
if ismember(pos, RBSLoc); continue; end | |
if ismember(pos, NdeILoc) && NdeIyn; continue; end | |
if pos >= ATGLoc | |
codonStart = floor((pos - ATGLoc) / 3) * 3 + ATGLoc; | |
codonRange = codonStart:codonStart + 2; | |
codon = seqCurr(codonRange); | |
newCodon = silentMutate(codon); | |
seqTest(codonRange) = newCodon; | |
if verbyn | |
fprintf('Changed codon %3s to %3s at position %3i \n', codon, newCodon, codonStart); | |
end | |
else | |
basesTemp = BASES; | |
basesTemp(strfind(BASES, seqCurr(pos))) = ''; | |
seqTest(pos) = basesTemp(randInt(0, 3)); | |
if verbyn | |
fprintf('Changed nucleotide at position %3i \n', pos); | |
end | |
end | |
% Apply the metropolis condition: | |
[~, Utest] = rnafold(seqTest); | |
if Utest > Ucurr || rand() < exp(-(Ucurr - Utest)/RT) | |
seqCurr = seqTest; | |
Ucurr = Utest; | |
if verbyn | |
fprintf(['Accepted new sequence ' seqCurr ' with energy ' num2str(Utest) '\n']); | |
end | |
if Ucurr > Ubest | |
seqBest = seqCurr; | |
Ubest = Ucurr; | |
end | |
end | |
% Check if we're done: | |
if nSteps > MAX_STEPS || Ucurr >= MIN_ENERGY | |
terminator = 0; | |
seqOut = seqBest; | |
dG = Ubest; | |
end | |
nSteps = nSteps + 1; | |
end | |
end | |
function seqOut = cleanSeq(seqIn) | |
BASES = 'ACGT'; | |
seqOut = ''; | |
seqIn = upper(seqIn); | |
for i = 1:length(seqIn) | |
if ismember(seqIn(i), BASES) | |
seqOut = [seqOut seqIn(i)]; | |
end | |
end | |
end | |
function codonOut = silentMutate(codonIn) | |
% Constants: | |
TRANSLATION_TABLE = geneticcode(1); | |
CODON_TABLE = struct(... | |
'R', {{'CGC', 'CGT'}},... | |
'L', {{'CUG'}},... | |
'S', {{'AGC', 'TCG', 'AGT', 'TCC', 'TCT'}},... | |
'A', {{'GCG', 'GCC', 'GCA'}},... | |
'G', {{'GGC', 'GGT'}},... | |
'P', {{'CCG'}},... | |
'T', {{'ACC', 'ACG'}},... | |
'V', {{'GTG', 'GTT', 'GTC'}},... | |
'I', {{'ATT', 'ATC'}},... | |
'N', {{'AAC', 'AAT'}},... | |
'D', {{'GAT'}},... | |
'C', {{'TGC', 'TGT'}},... | |
'Q', {{'CAG'}},... | |
'E', {{'GAA'}},... | |
'H', {{'CAT', 'CAC'}},... | |
'K', {{'AAA'}},... | |
'F', {{'TTT', 'TTC'}},... | |
'Y', {{'TAT', 'TAC'}},... | |
'M', {{'ATG'}},... | |
'W', {{'TGG'}}... | |
); | |
AA = TRANSLATION_TABLE.(codonIn); | |
choices = CODON_TABLE.(AA); | |
if length(choices) == 1 | |
codonOut = codonIn; | |
else | |
for i = 1:length(choices) | |
if char(choices{i}) == codonIn; choices(i) = ''; break; end; | |
end | |
codonOut = char(choices(randInt(0, length(choices)))); | |
end | |
end | |
function rint = randInt(min, max) | |
rint = ceil(rand() * (max - min) + min); | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%Dictionaries mapping E. coli metabolites to Syn metabolites: | |
DICT = DICT_exact; | |
BATCH_SIZE = 1; % # of reactions at a time | |
BOUND_L = -100; % default lower bound for new reactions | |
BOUND_U = 100; % default upper bound for new reactions | |
ecModel = EcAer; | |
synModel = synAn; | |
%%%%%%%%%%%%%%%%%%%%%%%% | |
% Initialize: | |
rxnStart = 1; | |
rxnEnd = BATCH_SIZE; | |
nRxns = length(EcAer.rxns); | |
terminator = 0; | |
changeCobraSolver('glpk'); | |
savedReactionInds = zeros(0, 2); | |
savedGrowthRates = zeros(0,1); | |
COUNTER = 1; | |
while true | |
% Check if we are at the end of the list of reactions, and set the | |
% terminator if we are: | |
if rxnEnd > nRxns | |
rxnEnd = nRxns; | |
terminator = 1; | |
end | |
synModelTest = synModel; | |
newRxns = {}; | |
for i = rxnStart : rxnEnd | |
[addedRxns synModelTest] = addEcRxnToSyn(ecModel, synModelTest, DICT, ... | |
i, BOUND_L, BOUND_U, 0, 0); | |
newRxns = [newRxns; addedRxns]; | |
end | |
% We now have the "fortified" synAn model. Check if it grows: | |
sol = optimizeCbModel(synModelTest); | |
if sol.f > 0.0001 | |
dim = size(savedReactionInds); | |
savedReactionInds(dim(1)+1, :) = [rxnStart rxnEnd]; | |
savedGrowthRates(length(savedGrowthRates)+1, 1) = sol.f; | |
end | |
% Check if we're done. If not, increment the reaction set: | |
if terminator | |
dim = size(savedReactionInds); | |
fprintf('\n %i sets of reactions that confer anaerobic growth \n', dim(1)) | |
fprintf('%6s %5s %s \n', 'Start', 'End', 'f'); | |
for j = 1:dim(1) | |
fprintf('%6i %5i %f \n', savedReactionInds(j,1), savedReactionInds(j,2), ... | |
savedGrowthRates(j)); | |
end | |
break | |
else | |
rxnStart = rxnStart + BATCH_SIZE; | |
rxnEnd = rxnEnd + BATCH_SIZE; | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function [DATA,nPlates,nLambdas] = smImport(path) | |
% [DATA,nPlates,nLambdas] = smImport(path) | |
% | |
% Imports the SoftMax Pro file located at <<path>> and stores the data in | |
% DATA, the number of plates as nPlates, and the number of wavelengths as | |
% nLambdas. | |
% | |
% For now, the function requires that the file be exported from SoftMax in | |
% "plate" format, opened in Excel and saved as an .xls file. | |
% | |
% DATA is a m x n x o x p tensor where | |
% | |
% m = 8, the number of rows in a 96-well plate | |
% n = 12, the number of columns in a 96-well plate | |
% o = the number of plates (blocks) in the file | |
% p = the number of wavelengths measured per plate | |
%% Read in the file: | |
[N T R] = xlsread(path); | |
% N contains the numerica data | |
% T contains the text data | |
% R contains the raw data | |
% Determine the number of plates: | |
blocks = char(T(1,1)); | |
nPlates = str2num(blocks(11:end)); | |
% If there are "notes", delete them: | |
if strcmp(T(2),'Note:')==1 | |
% At the end of the notes section is an empty row containing '~End'. Find | |
% all instances of '~End': | |
ends = find(strcmp(T,'~End')); | |
%The first '~End' is at the end of the notes: | |
endNotes = ends(1); | |
N(2:endNotes,:) = []; | |
T(2:endNotes,:) = []; | |
R(2:endNotes,:) = []; | |
disp('Deleted notes'); | |
nPlates = nPlates - 1; | |
end | |
%% We now have a well-structured file. Proceed: | |
dim = size(N); | |
nCols = dim(2); | |
% Determine the number of wavelengths per plate: | |
nLambdas = floor((nCols-1)/13); | |
% Initialize DATA | |
DATA = zeros(8,12,nPlates,nLambdas); | |
%% | |
for i = 1:nPlates | |
for j = 1:nLambdas | |
rowStart = 12*i-8; | |
colStart = 13*j-10; | |
DATA(:,:,i,j) = N(rowStart:rowStart+7,colStart:colStart+11); | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment