Last active
July 12, 2016 10:25
-
-
Save lukem512/5814a513842bd2f61bdd7a22755145e5 to your computer and use it in GitHub Desktop.
Genetic Algorithms: Final Project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% Final Project | |
% Luke Mitchell (lm0466) | |
% Main MATLAB script | |
function finalproj | |
% Retrieve mtDNA from GenBank | |
killerwhale = getgenbank('NC_023889'); | |
disp(['Using ', killerwhale.Definition]); | |
% All organisms are vertebrates | |
geneticcode = 'Vertebrate Mitochondrial'; | |
% Search all frames for ORFs | |
frames = [1, 2, 3, -1, -2, -3,]; | |
% Basic statistics about the mtDNA | |
figure | |
basecount(killerwhale,'Chart', 'Pie'); | |
title('Distribution of Nucleotide Bases'); | |
figure | |
ntdensity(killerwhale); | |
figure | |
dimercount(killerwhale, 'chart', 'bar'); | |
title('Dimer Count'); | |
% Extract ORFs | |
killerwhaleorfs = find_orfs(killerwhale, geneticcode, frames); | |
% Display the ORFs | |
% display_orfs(killerwhale, killerwhaleorfs, frames); | |
% Translate two genes | |
killerwhaleCYTBnt = killerwhale.Sequence(14192:15329); % Cytochrome B | |
killerwhaleCOX1nt = killerwhale.Sequence(5360:6908); % Cytochrome Oxidase Subunit 1 | |
killerwhaleCYTB = nt2aa(killerwhaleCYTBnt, 'GENETICCODE', geneticcode, 'ACGTOnly', 'false'); | |
killerwhaleCOX1 = nt2aa(killerwhaleCOX1nt, 'GENETICCODE', geneticcode, 'ACGTOnly', 'false'); | |
disp(['CYTB: ', killerwhaleCYTB]); | |
disp(['COX1: ', killerwhaleCOX1]); | |
% Create rooted phylogenetic trees for CYTB | |
data = {'Killer Whale' 'AF084061'; | |
'Risso Dolphin' 'AF084059'; | |
'Pygmy Killer Whale' 'AF084052'; | |
'Pacific White-sided Dolphin' 'AF084067'; | |
'Dusky Dolphin' 'AY257161'; | |
'Guiana Dolphin' 'DQ086828'; | |
'Melon-headed Whale' 'AF084053'; | |
'Bottlenosed Dolphin' 'AF084095'; | |
'Pantropical Spotted Dolphin' 'AY257161'; | |
}; | |
for ind = 1:length(data) | |
dolphins(ind).Header = data{ind, 1}; | |
dolphins(ind).Sequence = getgenbank(data{ind, 2}, 'sequenceonly', 'true'); | |
end | |
% Perform multiple alignment | |
ma = multialign(dolphins); | |
seqalignviewer(ma) | |
% Perform global alignment | |
pacificwhite = dolphins(4); | |
dusky = dolphins(5); | |
pantropic = dolphins(6); | |
[score, alignment] = nwalign(pantropic, dusky, 'ALPHABET', 'NT', 'SCORINGMATRIX', 'PAM50'); | |
disp(['Pantropic Dolphin and Dusky Dolphin: ', num2str(score)]); | |
showalignment(alignment); | |
[score, alignment] = nwalign(pantropic, pacificwhite, 'ALPHABET', 'NT', 'SCORINGMATRIX', 'PAM50'); | |
disp(['Pantropic Dolphin and Pacific White Dolphin: ', num2str(score)]); | |
showalignment(alignment); | |
[score, alignment] = nwalign(dusky, pacificwhite, 'ALPHABET', 'NT', 'SCORINGMATRIX', 'PAM50'); | |
disp(['Dusky Dolphin and Pacific White Dolphin: ', num2str(score)]); | |
showalignment(alignment); | |
% Add an outgroup | |
% Bald Eagle cytochrome b, partial (mitochondrion) | |
expanded = dolphins; | |
ind = numel(expanded)+1; | |
expanded(ind).Header = 'Bald Eagle'; | |
expanded(ind).Sequence = getgenbank('AY987316', 'sequenceonly', 'true'); | |
% Re-root the tree | |
[tree, cytbntd] = make_phylogenetic_tree(expanded, 'NT'); | |
outgroup = getbyname(tree, 'Bald Eagle'); | |
reroot(tree, outgroup); | |
plot(tree); | |
title('Re-rooted Distance Tree of CYTB Nucelotides from Dolphins'); | |
xlabel('Evolutionary distance') | |
% Convert to amino acids and repeat | |
for ind = 1:numel(expanded) | |
expanded(ind).Sequence = nt2aa(expanded(ind).Sequence, 'GENETICCODE', geneticcode, 'ACGTOnly', 'false'); | |
end | |
[tree, cytbaad] = make_phylogenetic_tree(expanded, 'AA'); | |
outgroup = getbyname(tree, 'Bald Eagle'); | |
reroot(tree, outgroup); | |
plot(tree); | |
title('Re-rooted Distance Tree of CYTB Amino Acids from Dolphins'); | |
xlabel('Evolutionary distance'); | |
% Consensus tree | |
weights = [sum(cytbntd) sum(cytbaad)]; | |
weights = weights / sum(weights); | |
dist = cytbntd .* weights(1) + cytbaad .* weights(2); | |
tree_cytb = seqneighjoin(dist,'equivar', expanded); | |
plot(tree_cytb,'type','angular'); | |
title('Consensus Tree using CYTB from Dolphins') | |
% Create rooted phylogenetic trees for COX1 | |
data = {'Killer Whale' 'EU496323'; | |
'Risso Dolphin' 'EU496298'; | |
'Pygmy Killer Whale' 'EU496290'; | |
'Pacific White-sided Dolphin' 'EU496357'; | |
'Melon-headed Whale' 'EU496294'; | |
'Bottlenosed Dolphin' 'EU496329'; | |
'Pantropical Spotted Dolphin' 'EU496353'; | |
}; | |
dolphins = []; | |
for ind = 1:length(data) | |
dolphins(ind).Header = data{ind, 1}; | |
dolphins(ind).Sequence = getgenbank(data{ind, 2}, 'sequenceonly', 'true'); | |
end | |
% Perform multiple alignment | |
ma = multialign(dolphins); | |
seqalignviewer(ma) | |
% Create trees | |
expanded = dolphins; | |
ind = numel(expanded)+1; | |
expanded(ind).Header = 'Bald Eagle'; | |
expanded(ind).Sequence = getgenbank('KF525373', 'sequenceonly', 'true'); | |
[tree, cox1ntd] = make_phylogenetic_tree(expanded, 'NT'); | |
outgroup = getbyname(tree, 'Bald Eagle'); | |
reroot(tree, outgroup); | |
plot(tree); | |
title('Re-rooted Distance Tree of COX1 Nucleotides from Dolphins'); | |
xlabel('Evolutionary distance'); | |
for ind = 1:numel(expanded) | |
expanded(ind).Sequence = nt2aa(expanded(ind).Sequence, 'GENETICCODE', geneticcode, 'ACGTOnly', 'false'); | |
end | |
[tree, cox1aad] = make_phylogenetic_tree(expanded, 'AA'); | |
outgroup = getbyname(tree, 'Bald Eagle'); | |
reroot(tree, outgroup); | |
plot(tree); | |
title('Re-rooted Distance Tree of COX1 Amino Acids from Dolphins'); | |
xlabel('Evolutionary distance'); | |
end | |
% Create a phylogenetic tree | |
function [tree, dist] = make_phylogenetic_tree(seqs, alphabet) | |
% Create a multiple alignment from the genes | |
ma = multialign(seqs); | |
% View the alignment | |
% seqalignviewer(ma); | |
% Compute the pairwise-distances using the Jukes-Cantor correction | |
dist = seqpdist(ma, 'METHOD', 'Jukes-Cantor', 'ALPHABET', alphabet); | |
% Build the phylogenetic tree using the Neighbor-Joining algorithm | |
tree = seqneighjoin(dist, 'equivar', ma); | |
end | |
% Find open reading frames for a specified mtDNA | |
function orfs = find_orfs(mt, geneticcode, frames) | |
% Create a random permutation of the sequence | |
randmt = randseq(length(mt.Sequence), 'FROMSTRUCTURE', basecount(mt)); | |
% Find all ORFs from both random and original sequence | |
randorfs = seqshoworfs(randmt, 'GENETICCODE', geneticcode, 'MINIMUMLENGTH', 1, 'FRAMES', frames); | |
orfs = seqshoworfs(mt, 'GENETICCODE', geneticcode, 'MINIMUMLENGTH', 1, 'FRAMES', frames); | |
% Find the lengths of all ORFs | |
ORFLength = []; | |
randORFLength = []; | |
for i = 1:length(frames) | |
% Original sequence | |
start = orfs(i).Start; | |
stop = orfs(i).Stop; | |
for j = 1:length(start) | |
if numel(stop) >= j | |
ORFLength = [ORFLength; stop(j) - start(j)]; | |
end | |
end | |
% Random sequence | |
start = randorfs(i).Start; | |
stop = randorfs(i).Stop; | |
for j = 1:length(start) | |
if numel(stop) >= j | |
randORFLength = [randORFLength; stop(j) - start(j)]; | |
end | |
end | |
end | |
% Compare random and original ORFs | |
max_threshold = max(randORFLength); | |
n_max = length(find(ORFLength >= max_threshold)); | |
empirical_threshold = ceil(prctile(randORFLength, 95)); | |
n_empirical = length(find(ORFLength >= empirical_threshold)); | |
disp(['Using minimum length of ' num2str(empirical_threshold)]) | |
% Find the open reading frames | |
orfs = seqshoworfs(mt, 'GENETICCODE', geneticcode, 'MINIMUMLENGTH', empirical_threshold, 'FRAMES', frames); | |
end | |
% Display the ORF data for each specified frame | |
function display_orfs(mt, orfs, frames) | |
for i = 1:length(frames) | |
disp(['Frame ' num2str(frames(i))]); | |
start = orfs(i).Start; | |
stop = orfs(i).Stop; | |
for j = 1:length(start) | |
disp([num2str(start(j)) ' to ' num2str(stop(j)) ': ' mt.Sequence(start(j):stop(j))]) | |
end | |
disp(' ') | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment