Skip to content

Instantly share code, notes, and snippets.

@lukem512
Last active July 12, 2016 10:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lukem512/5814a513842bd2f61bdd7a22755145e5 to your computer and use it in GitHub Desktop.
Save lukem512/5814a513842bd2f61bdd7a22755145e5 to your computer and use it in GitHub Desktop.
Genetic Algorithms: Final Project
% Final Project
% Luke Mitchell (lm0466)
% Main MATLAB script
function finalproj
% Retrieve mtDNA from GenBank
killerwhale = getgenbank('NC_023889');
disp(['Using ', killerwhale.Definition]);
% All organisms are vertebrates
geneticcode = 'Vertebrate Mitochondrial';
% Search all frames for ORFs
frames = [1, 2, 3, -1, -2, -3,];
% Basic statistics about the mtDNA
figure
basecount(killerwhale,'Chart', 'Pie');
title('Distribution of Nucleotide Bases');
figure
ntdensity(killerwhale);
figure
dimercount(killerwhale, 'chart', 'bar');
title('Dimer Count');
% Extract ORFs
killerwhaleorfs = find_orfs(killerwhale, geneticcode, frames);
% Display the ORFs
% display_orfs(killerwhale, killerwhaleorfs, frames);
% Translate two genes
killerwhaleCYTBnt = killerwhale.Sequence(14192:15329); % Cytochrome B
killerwhaleCOX1nt = killerwhale.Sequence(5360:6908); % Cytochrome Oxidase Subunit 1
killerwhaleCYTB = nt2aa(killerwhaleCYTBnt, 'GENETICCODE', geneticcode, 'ACGTOnly', 'false');
killerwhaleCOX1 = nt2aa(killerwhaleCOX1nt, 'GENETICCODE', geneticcode, 'ACGTOnly', 'false');
disp(['CYTB: ', killerwhaleCYTB]);
disp(['COX1: ', killerwhaleCOX1]);
% Create rooted phylogenetic trees for CYTB
data = {'Killer Whale' 'AF084061';
'Risso Dolphin' 'AF084059';
'Pygmy Killer Whale' 'AF084052';
'Pacific White-sided Dolphin' 'AF084067';
'Dusky Dolphin' 'AY257161';
'Guiana Dolphin' 'DQ086828';
'Melon-headed Whale' 'AF084053';
'Bottlenosed Dolphin' 'AF084095';
'Pantropical Spotted Dolphin' 'AY257161';
};
for ind = 1:length(data)
dolphins(ind).Header = data{ind, 1};
dolphins(ind).Sequence = getgenbank(data{ind, 2}, 'sequenceonly', 'true');
end
% Perform multiple alignment
ma = multialign(dolphins);
seqalignviewer(ma)
% Perform global alignment
pacificwhite = dolphins(4);
dusky = dolphins(5);
pantropic = dolphins(6);
[score, alignment] = nwalign(pantropic, dusky, 'ALPHABET', 'NT', 'SCORINGMATRIX', 'PAM50');
disp(['Pantropic Dolphin and Dusky Dolphin: ', num2str(score)]);
showalignment(alignment);
[score, alignment] = nwalign(pantropic, pacificwhite, 'ALPHABET', 'NT', 'SCORINGMATRIX', 'PAM50');
disp(['Pantropic Dolphin and Pacific White Dolphin: ', num2str(score)]);
showalignment(alignment);
[score, alignment] = nwalign(dusky, pacificwhite, 'ALPHABET', 'NT', 'SCORINGMATRIX', 'PAM50');
disp(['Dusky Dolphin and Pacific White Dolphin: ', num2str(score)]);
showalignment(alignment);
% Add an outgroup
% Bald Eagle cytochrome b, partial (mitochondrion)
expanded = dolphins;
ind = numel(expanded)+1;
expanded(ind).Header = 'Bald Eagle';
expanded(ind).Sequence = getgenbank('AY987316', 'sequenceonly', 'true');
% Re-root the tree
[tree, cytbntd] = make_phylogenetic_tree(expanded, 'NT');
outgroup = getbyname(tree, 'Bald Eagle');
reroot(tree, outgroup);
plot(tree);
title('Re-rooted Distance Tree of CYTB Nucelotides from Dolphins');
xlabel('Evolutionary distance')
% Convert to amino acids and repeat
for ind = 1:numel(expanded)
expanded(ind).Sequence = nt2aa(expanded(ind).Sequence, 'GENETICCODE', geneticcode, 'ACGTOnly', 'false');
end
[tree, cytbaad] = make_phylogenetic_tree(expanded, 'AA');
outgroup = getbyname(tree, 'Bald Eagle');
reroot(tree, outgroup);
plot(tree);
title('Re-rooted Distance Tree of CYTB Amino Acids from Dolphins');
xlabel('Evolutionary distance');
% Consensus tree
weights = [sum(cytbntd) sum(cytbaad)];
weights = weights / sum(weights);
dist = cytbntd .* weights(1) + cytbaad .* weights(2);
tree_cytb = seqneighjoin(dist,'equivar', expanded);
plot(tree_cytb,'type','angular');
title('Consensus Tree using CYTB from Dolphins')
% Create rooted phylogenetic trees for COX1
data = {'Killer Whale' 'EU496323';
'Risso Dolphin' 'EU496298';
'Pygmy Killer Whale' 'EU496290';
'Pacific White-sided Dolphin' 'EU496357';
'Melon-headed Whale' 'EU496294';
'Bottlenosed Dolphin' 'EU496329';
'Pantropical Spotted Dolphin' 'EU496353';
};
dolphins = [];
for ind = 1:length(data)
dolphins(ind).Header = data{ind, 1};
dolphins(ind).Sequence = getgenbank(data{ind, 2}, 'sequenceonly', 'true');
end
% Perform multiple alignment
ma = multialign(dolphins);
seqalignviewer(ma)
% Create trees
expanded = dolphins;
ind = numel(expanded)+1;
expanded(ind).Header = 'Bald Eagle';
expanded(ind).Sequence = getgenbank('KF525373', 'sequenceonly', 'true');
[tree, cox1ntd] = make_phylogenetic_tree(expanded, 'NT');
outgroup = getbyname(tree, 'Bald Eagle');
reroot(tree, outgroup);
plot(tree);
title('Re-rooted Distance Tree of COX1 Nucleotides from Dolphins');
xlabel('Evolutionary distance');
for ind = 1:numel(expanded)
expanded(ind).Sequence = nt2aa(expanded(ind).Sequence, 'GENETICCODE', geneticcode, 'ACGTOnly', 'false');
end
[tree, cox1aad] = make_phylogenetic_tree(expanded, 'AA');
outgroup = getbyname(tree, 'Bald Eagle');
reroot(tree, outgroup);
plot(tree);
title('Re-rooted Distance Tree of COX1 Amino Acids from Dolphins');
xlabel('Evolutionary distance');
end
% Create a phylogenetic tree
function [tree, dist] = make_phylogenetic_tree(seqs, alphabet)
% Create a multiple alignment from the genes
ma = multialign(seqs);
% View the alignment
% seqalignviewer(ma);
% Compute the pairwise-distances using the Jukes-Cantor correction
dist = seqpdist(ma, 'METHOD', 'Jukes-Cantor', 'ALPHABET', alphabet);
% Build the phylogenetic tree using the Neighbor-Joining algorithm
tree = seqneighjoin(dist, 'equivar', ma);
end
% Find open reading frames for a specified mtDNA
function orfs = find_orfs(mt, geneticcode, frames)
% Create a random permutation of the sequence
randmt = randseq(length(mt.Sequence), 'FROMSTRUCTURE', basecount(mt));
% Find all ORFs from both random and original sequence
randorfs = seqshoworfs(randmt, 'GENETICCODE', geneticcode, 'MINIMUMLENGTH', 1, 'FRAMES', frames);
orfs = seqshoworfs(mt, 'GENETICCODE', geneticcode, 'MINIMUMLENGTH', 1, 'FRAMES', frames);
% Find the lengths of all ORFs
ORFLength = [];
randORFLength = [];
for i = 1:length(frames)
% Original sequence
start = orfs(i).Start;
stop = orfs(i).Stop;
for j = 1:length(start)
if numel(stop) >= j
ORFLength = [ORFLength; stop(j) - start(j)];
end
end
% Random sequence
start = randorfs(i).Start;
stop = randorfs(i).Stop;
for j = 1:length(start)
if numel(stop) >= j
randORFLength = [randORFLength; stop(j) - start(j)];
end
end
end
% Compare random and original ORFs
max_threshold = max(randORFLength);
n_max = length(find(ORFLength >= max_threshold));
empirical_threshold = ceil(prctile(randORFLength, 95));
n_empirical = length(find(ORFLength >= empirical_threshold));
disp(['Using minimum length of ' num2str(empirical_threshold)])
% Find the open reading frames
orfs = seqshoworfs(mt, 'GENETICCODE', geneticcode, 'MINIMUMLENGTH', empirical_threshold, 'FRAMES', frames);
end
% Display the ORF data for each specified frame
function display_orfs(mt, orfs, frames)
for i = 1:length(frames)
disp(['Frame ' num2str(frames(i))]);
start = orfs(i).Start;
stop = orfs(i).Stop;
for j = 1:length(start)
disp([num2str(start(j)) ' to ' num2str(stop(j)) ': ' mt.Sequence(start(j):stop(j))])
end
disp(' ')
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment