Created
January 31, 2014 16:04
-
-
Save meren/8735046 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(reshape) | |
library(reshape2) | |
library(RColorBrewer) | |
library("vegan") | |
library(gtools) | |
library(grid) | |
library(gridBase) | |
library(gridExtra) | |
library(Biostrings) | |
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { | |
require(grid) | |
# Make a list from the ... arguments and plotlist | |
plots <- c(list(...), plotlist) | |
numPlots = length(plots) | |
# If layout is NULL, then use 'cols' to determine layout | |
if (is.null(layout)) { | |
# Make the panel | |
# ncol: Number of columns of plots | |
# nrow: Number of rows needed, calculated from # of cols | |
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), | |
ncol = cols, nrow = ceiling(numPlots/cols)) | |
} | |
if (numPlots==1) { | |
print(plots[[1]]) | |
} else { | |
# Set up the page | |
grid.newpage() | |
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) | |
# Make each plot, in the correct location | |
for (i in 1:numPlots) { | |
# Get the i,j matrix positions of the regions that contain this subplot | |
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) | |
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, | |
layout.pos.col = matchidx$col)) | |
} | |
} | |
} | |
PLOT_SITE <- function(df, genera, site = 'TH', metric='horn', method='ward'){ | |
# get the subset for genera and site | |
df_sub_genera = df[df$genus %in% genera, ] | |
df_sub_site <- df_sub_genera[df_sub_genera$site == site, ] | |
sub_matrix <- acast(df_sub_site, sample~oligo, value.var="abundance", fill = 0) | |
distance <- metric #"manhattan", "euclidean", "canberra", "bray", "kulczynski", "jaccard", "gower", "morisita", "horn", "mountford", "raup" , "binomial" or "chao" | |
d <- vegdist(sub_matrix, method=distance) | |
fit <- hclust(d, method=method) # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid" | |
samples_order <- fit$labels[fit$order] | |
# set the order of samples based on clustering results: | |
df_sub_site$sample <- ordered(df_sub_site$sample,levels=samples_order) | |
# oh, very cool | |
df_sub_site$oligo <- reorder(df_sub_site$oligo, df_sub_site$abundance, FUN=sum) | |
df_sub_site$oligo <- factor(df_sub_site$oligo, levels=levels(df_sub_site$oligo)) | |
df_sub_site <- df_sub_site[order(df_sub_site$oligo), ] | |
# prepare ggplot object | |
p <- ggplot(df_sub_site, aes(x=factor(sample), y=abundance, fill = oligo)) | |
p <- p + geom_bar(position="fill", stat = "identity") | |
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), axis.ticks.y = element_blank(), legend.position = 'none', panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
p <- p + labs(x='', y=paste(genera, collapse=' / ')) | |
p <- p + scale_y_continuous(breaks = NULL) | |
# color manually: | |
colors <- sample(rainbow(length(levels(factor(df_sub_site$oligo))), s = 0.5, v = 0.7)) | |
p <- p + scale_fill_manual(limits = levels(factor(df_sub_site$oligo)), values = colors) | |
print(p) | |
# | |
# # start plotting | |
# #pdf('SINGLE_SITE.pdf', width=16, height=16) | |
# plot.new() | |
# gl <- grid.layout(nrow=2, ncol=1) | |
# vp.1 <- viewport(layout.pos.col=1, layout.pos.row=1) | |
# vp.2 <- viewport(layout.pos.col=1, layout.pos.row=2) | |
# pushViewport(viewport(layout=gl)) | |
# pushViewport(vp.1) | |
# par(new=TRUE) | |
# plot(fit, cex = 0.7, sub = paste("Distance metric: ",distance,sep=""), xlab = '') # display dendogram | |
# popViewport() | |
# pushViewport(vp.2) | |
# print(p, newpage = FALSE) | |
# popViewport() | |
# #dev.off() | |
} | |
PLOT_GENUS <- function(df, genera, site='TH', metric='horn', method='ward'){ | |
# get genus | |
df_sub_genus <- df[df$genus %in% genera, ] | |
# get clustering based on SITE | |
df_sub_genus_sub_site <- df_sub_genus[df_sub_genus$site == site, ] | |
sub_matrix <- acast(df_sub_genus_sub_site, sample~oligo, value.var="abundance", fill = 0) | |
distance <- metric #"manhattan", "euclidean", "canberra", "bray", "kulczynski", "jaccard", "gower", "morisita", "horn", "mountford", "raup" , "binomial" or "chao" | |
d <- vegdist(sub_matrix, method=distance) | |
fit <- hclust(d, method=method) # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid" | |
samples_order <- fit$labels[fit$order] | |
# set the order of samples based on clustering results: | |
df_sub_genus$sample <- ordered(df_sub_genus$sample,levels=samples_order) | |
# oh, very cool | |
df_sub_genus$oligo <- reorder(df_sub_genus$oligo, df_sub_genus$abundance, FUN=sum) | |
df_sub_genus$oligo <- factor(df_sub_genus$oligo, levels=levels(df_sub_genus$oligo)) | |
df_sub_genus <- df_sub_genus[order(df_sub_genus$oligo), ] | |
p <- ggplot(df_sub_genus, aes(x=factor(sample), y=abundance, factor(oligo), color, fill = oligo)) | |
p <- p + geom_bar(position="fill", stat = "identity", width=0.99) | |
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), axis.ticks.y = element_blank(), legend.position = 'none', panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
p <- p + labs(x='', y=length(genera)) | |
p <- p + scale_y_continuous(breaks = NULL) | |
p <- p + facet_grid(site ~ .) | |
# color manually: | |
colors <- sample(rainbow(length(levels(factor(df_sub_genus$oligo))), s = 0.5, v = 0.7)) | |
p <- p + scale_fill_manual(limits = levels(factor(df_sub_genus$oligo)), values = colors) | |
print(p) | |
# | |
# plot.new() | |
# gl <- grid.layout(nrow=2, ncol=1) | |
# vp.1 <- viewport(layout.pos.col=1, layout.pos.row=1) | |
# vp.2 <- viewport(layout.pos.col=1, layout.pos.row=2) | |
# pushViewport(viewport(layout=gl)) | |
# pushViewport(vp.1) | |
# | |
# par(new=TRUE) | |
# plot(fit, cex = 0.7, sub = paste("Distance metric: ",distance,sep=""), xlab = '') # display dendogram | |
# popViewport() | |
# pushViewport(vp.2) | |
# print(p, newpage = FALSE) | |
# popViewport() | |
} | |
PLOT_OLIGO <- function(df, oligo, site = 'TH', metric='horn', method='average', title = 'no title', labels = NA){ | |
# get the subset for genera and site | |
df_sub_genera = df[df$oligo %in% oligo, ] | |
df_sub_site <- df_sub_genera[df_sub_genera$site == site, ] | |
sub_matrix <- acast(df_sub_site, sample~oligo, value.var="abundance", fill = 0) | |
distance <- metric #"manhattan", "euclidean", "canberra", "bray", "kulczynski", "jaccard", "gower", "morisita", "horn", "mountford", "raup" , "binomial" or "chao" | |
d <- vegdist(sub_matrix, method=distance) | |
fit <- hclust(d, method=method) # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid" | |
samples_order <- fit$labels[fit$order] | |
# set the order of samples based on clustering results: | |
df_sub_site$sample <- ordered(df_sub_site$sample,levels=samples_order) | |
# oh, very cool | |
df_sub_site$oligo <- reorder(df_sub_site$oligo, df_sub_site$abundance, FUN=sum) | |
df_sub_site$oligo <- factor(df_sub_site$oligo, levels=levels(df_sub_site$oligo)) | |
df_sub_site <- df_sub_site[order(df_sub_site$oligo), ] | |
# replace oligos with labels in legend | |
if(!is.na(labels)) | |
df_sub_site$oligo <- factor(df_sub_site$oligo, levels = oligo, labels = labels) | |
# color manually: | |
colors <- sample(rainbow(length(levels(factor(df_sub_site$oligo))), s = 0.5, v = 0.7)) | |
# prepare ggplot object | |
p <- ggplot(df_sub_site, aes(x=factor(sample), y=abundance, fill = oligo)) | |
p <- p + labs(title=title) | |
p <- p + geom_bar(position="fill", stat = "identity") | |
p <- p + theme(axis.text.x = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_blank(), axis.ticks.x = element_blank(), legend.position = 'top', panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
p <- p + guides(fill = guide_legend(nrow = 2, title.position = NULL)) | |
p <- p + scale_y_continuous() | |
p <- p + labs(x='', y="Ratio of oligotypes") | |
p <- p + scale_fill_manual(limits = levels(factor(df_sub_site$oligo)), values = colors) | |
# prepare ggplot object | |
q <- ggplot(df_sub_site, aes(x=factor(sample), y=abundance)) | |
q <- q + stat_summary(fun.y = sum, geom = "bar") | |
q <- q + theme(axis.text.x = element_text(angle = 90, size=10, vjust=0, face = 'bold'), legend.position = 'none', axis.ticks.x = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
q <- q + labs(x='', y="Number of reads (square root normalized)") | |
q <- q + scale_y_reverse() | |
q <- q + scale_fill_manual(limits = levels(factor(df_sub_site$oligo)), values = colors) | |
# MAKE SURE THEY HAVE THE SAME WIDTH: | |
gP <- ggplotGrob(p) | |
gQ <- ggplotGrob(q) | |
maxWidth = grid::unit.pmax(gP$widths[2:5], gQ$widths[2:5]) | |
gP$widths[2:5] <- as.list(maxWidth) | |
gQ$widths[2:5] <- as.list(maxWidth) | |
grid.arrange(gP, gQ, ncol=1) | |
} | |
PLOT_OLIGO_PER_SITE_BOXES <- function(df, oligo, sites = c('BM', 'HP', 'KG', 'PT', 'ST', 'SUBP', 'SUPP', 'SV', 'TD', 'TH'), title = 'no title', labels = NA){ | |
df_sub_genus <- df[df$oligo %in% oligo, ] | |
df_sub_genus <- df_sub_genus[df_sub_genus$site %in% sites, ] | |
dfx <- data.frame(sample=character(), | |
site=character(), | |
oligo=character(), | |
abundance=numeric(), | |
abundance_p=numeric(), | |
stringsAsFactors=FALSE) | |
names(dfx) <- c('sample', 'site', 'oligo', 'abundance', 'abundance_p') | |
N <- 0 | |
for(sample in unique(df_sub_genus$sample)){ | |
sample_reduced <- df_sub_genus[df_sub_genus$sample == sample, ] | |
for (site in sites){ | |
site_reduced <- sample_reduced[sample_reduced$site == site, ] | |
total_reads_in_site <- sum(site_reduced$abundance) | |
for(oligo_in_site in site_reduced$oligo){ | |
N <- N + 1 | |
oligo_reduced <- site_reduced[site_reduced$oligo == oligo_in_site, ] | |
oligo_in_site_abundance <- oligo_reduced$abundance | |
dfx[N, ] <- c(sample = sample, site=site, oligo = oligo_in_site, abundance = oligo_in_site_abundance, abundance_p = oligo_in_site_abundance * 100.0 / total_reads_in_site) | |
} | |
} | |
} | |
dfx$abundance_p <- as.numeric(as.character(dfx$abundance_p)) | |
# replace oligos with labels in legend | |
if(!is.na(labels)) | |
dfx$oligo <- factor(dfx$oligo, levels = oligo, labels = labels) | |
p <- ggplot(dfx, aes(x=factor(site), y=abundance_p, color = oligo)) | |
p <- p + geom_boxplot() | |
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), axis.ticks.y = element_blank(), legend.position = 'right', panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
p <- p + labs(title=title) | |
# color manually: | |
colors <- sample(rainbow(length(levels(factor(df_sub_genus$oligo))), s = 0.5, v = 0.7)) | |
p <- p + scale_fill_manual(limits = levels(factor(df_sub_genus$oligo)), values = colors) | |
print(p) | |
} | |
PLOT_OLIGO_PER_SITE_BOXES_PERCENTS <- function(df, oligo, sites = c('BM', 'HP', 'KG', 'PT', 'ST', 'SUBP', 'SUPP', 'SV', 'TD', 'TH'), title = 'no title', labels = NA){ | |
df_sub_genus <- df[df$oligo %in% oligo, ] | |
df_sub_genus <- df_sub_genus[df_sub_genus$site %in% sites, ] | |
dfx <- data.frame(sample=character(), | |
site=character(), | |
oligo=character(), | |
abundance=numeric(), | |
abundance_p=numeric(), | |
stringsAsFactors=FALSE) | |
names(dfx) <- c('sample', 'site', 'oligo', 'abundance', 'abundance_p') | |
N <- 0 | |
for(sample in unique(df_sub_genus$sample)){ | |
sample_reduced <- df_sub_genus[df_sub_genus$sample == sample, ] | |
for (site in sites){ | |
df_site <- df[df$site == site, ] | |
df_site_sample <- df_site[df_site$sample == sample, ] | |
tot_num_reads_in_site <- sum(df_site_sample$abundance) | |
site_reduced <- sample_reduced[sample_reduced$site == site, ] | |
total_reads_in_site <- sum(site_reduced$abundance) | |
for(oligo_in_site in site_reduced$oligo){ | |
oligo_reduced <- site_reduced[site_reduced$oligo == oligo_in_site, ] | |
oligo_in_site_abundance <- oligo_reduced$abundance | |
oligo_in_site_abundance_p <- oligo_in_site_abundance * 100.0 / tot_num_reads_in_site | |
N <- N + 1 | |
dfx[N, ] <- c(sample = sample, site=site, oligo = oligo_in_site, abundance = oligo_in_site_abundance, abundance_p = oligo_in_site_abundance_p) | |
} | |
} | |
} | |
dfx$abundance_p <- as.numeric(as.character(dfx$abundance_p)) | |
outlier_limit = boxplot.stats(dfx$abundance_p)$stats[c(1, 5)][2] | |
#dfx <- dfx[dfx$abundance_p < outlier_limit, ] | |
p <- ggplot(dfx, aes(x=factor(site), y=abundance_p, color = site)) | |
p <- p + geom_boxplot(outlier.shape = NA) | |
p <- p + geom_jitter(position = position_jitter(width = .05, height = 0), size=3, alpha=0.4) | |
p <- p + theme(axis.text.y = element_text(angle = 90, hjust = 0.5, vjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5), axis.ticks.x = element_blank(), legend.position = 'none', panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
p <- p + ylab(title) | |
p <- p + xlab('') | |
p <- p + scale_y_sqrt() | |
return(p) | |
} | |
PLOT_OLIGO_PER_SITE_STACKBAR <- function(df, oligo, sites = c('BM', 'HP', 'KG', 'PT', 'ST', 'SUBP', 'SUPP', 'SV', 'TD', 'TH'), title = 'no title', labels = NA, colors = NA){ | |
# test variables: | |
#oligo = c('Gammaproteobacteria_ATAGAATTTGTTCTC', 'Gammaproteobacteria_ATAGAATTTGATCTC', 'Gammaproteobacteria_ATAAAGTTTGACCTC', 'Gammaproteobacteria_ATAGAGTTTGATCTC', 'Gammaproteobacteria_ATAGAGTTTGACCTC', 'Gammaproteobacteria_ACAGAATTTAATCTC', 'Gammaproteobacteria_ATAGAATATAATCTC', 'Gammaproteobacteria_ATAGAATTTAGTCTC', 'Gammaproteobacteria_ATAGAATTTAATCTC', 'Gammaproteobacteria_ATAGAGTATAATCTC', 'Gammaproteobacteria_ATAGGGTTTAACCTC', 'Gammaproteobacteria_AAAGAGTTTAACCTC', 'Gammaproteobacteria_ATAGAGTTTAATCTC', 'Gammaproteobacteria_ATAGAGTTTAACCTC') | |
#labels = c('T. aromaticivorans V5 98.7', 'T. aromaticivorans V4 99.2', 'T. aromaticivorans V3 99.6', 'T. aromaticivorans V2 99.6', 'T. aromaticivorans V1 100', 'H. parainfluenzae V9 98.7', 'H. parainfluenzae V8 98.7', 'H. parainfluenzae V7 98.7', 'H. parainfluenzae V6 99.2', 'H. parainfluenzae V5 99.2', 'H. parainfluenzae V4 99.6', 'H. parainfluenzae V3 99.6', 'H. parainfluenzae V2 99.6', 'H. parainfluenzae V1 100') | |
#title = 'Distribution of T. aromaticivorans and H. parainfluenzae' | |
#sites = c( 'SUBP', 'SUPP','BM', 'HP', 'KG', 'PT', 'SV', 'TD', 'TH') | |
df_sub_genus <- df[df$oligo %in% oligo, ] | |
df_sub_genus <- df_sub_genus[df_sub_genus$site %in% sites, ] | |
dfx <- data.frame( | |
site=character(), | |
oligo=character(), | |
abundance=numeric(), | |
stringsAsFactors=FALSE) | |
names(dfx) <- c('site', 'oligo', 'abundance') | |
N <- 0 | |
for (site in sites){ | |
site_reduced <- df_sub_genus[df_sub_genus$site == site, ] | |
for(oligo_in_site in oligo){ | |
N <- N + 1 | |
oligo_reduced <- site_reduced[site_reduced$oligo == oligo_in_site, ] | |
oligo_in_site_abundance <- sum(oligo_reduced$abundance) | |
dfx[N, ] <- c(site=site, oligo = oligo_in_site, abundance = oligo_in_site_abundance) | |
} | |
} | |
# replace oligos with labels in legend | |
if(!is.na(labels)){ | |
dfx$oligo <- factor(dfx$oligo, levels = oligo, labels = labels) | |
} | |
dfx$abundance <- as.numeric(as.character(dfx$abundance)) | |
dfx$oligo <- factor(dfx$oligo) | |
p <- ggplot(dfx, aes(x=factor(site), y=abundance, group= oligo, fill = oligo)) | |
p <- p + geom_bar(position="fill", stat = "identity", width=0.90, colour = 'black') | |
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), axis.ticks.y = element_blank(), legend.position = 'right', panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
p <- p + labs(title=title) | |
# color manually: | |
if(is.na(colors)) | |
colors <- sample(rainbow(length(levels(dfx$oligo)), s = 0.7, v = 0.9)) | |
p <- p + scale_fill_manual(limits = levels(dfx$oligo), values = colors) | |
print(p) | |
return(colors) | |
} | |
PLOT_OLIGO_EVERYWHERE <- function(df, oligo, cluster_site='TH', sites = c('BM', 'HP', 'KG', 'PT', 'ST', 'SUBP', 'SUPP', 'SV', 'TD', 'TH'), metric='horn', method='average', title = 'no title', labels = NA, colors = NA){ | |
# df=df_v1v3 | |
# oligo= man_oligos_v1v3 | |
# sites = man_sites_v1v3 | |
# title = 'most abundant Neisseria v1v3' | |
# labels = man_labels_v1v3 | |
# colors = man_colors_v1v3 | |
# cluster_site = 'SUBP' | |
# metric = 'horn' | |
# method = 'ward' | |
# get genus | |
df_sub_genus <- df[df$oligo %in% oligo, ] | |
df_sub_genus <- df_sub_genus[df_sub_genus$site %in% sites, ] | |
# get clustering based on SITE | |
df_sub_genus_sub_site <- df_sub_genus[df_sub_genus$site == cluster_site, ] | |
sub_matrix <- acast(df_sub_genus_sub_site, sample~oligo, value.var="abundance", fill = 0) | |
distance <- metric #"manhattan", "euclidean", "canberra", "bray", "kulczynski", "jaccard", "gower", "morisita", "horn", "mountford", "raup" , "binomial" or "chao" | |
d <- vegdist(sub_matrix, method=distance) | |
fit <- hclust(d, method=method) # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid" | |
samples_order <- fit$labels[fit$order] | |
# set the order of samples based on clustering results: | |
df_sub_genus$sample <- factor(df_sub_genus$sample, levels=samples_order) | |
# I DON'T KNOW WHAT I'M DOING HERE, THERE ARE NA's INTRODUCED IN THE PREVIOUS LINE: | |
df_sub_genus <- df_sub_genus[!df_sub_genus$sample %in% c(NA), ] | |
# oh, very cool | |
df_sub_genus$oligo <- reorder(df_sub_genus$oligo, df_sub_genus$abundance, FUN=sum) | |
df_sub_genus$oligo <- factor(df_sub_genus$oligo, levels=levels(df_sub_genus$oligo)) | |
df_sub_genus <- df_sub_genus[order(df_sub_genus$oligo), ] | |
# replace oligos with labels in legend | |
if(!is.na(labels)){ | |
df_sub_genus$oligo <- factor(df_sub_genus$oligo, levels = oligo, labels = labels) | |
} | |
df_sub_genus <- df_sub_genus[with(df_sub_genus, order(site, oligo)), ] | |
df_sub_genus$site <- factor(df_sub_genus$site) | |
p <- ggplot(df_sub_genus, aes(x=factor(sample), y=abundance, labels, color, fill = oligo)) | |
p <- p + geom_bar(position="fill", stat = "identity", width=0.90) | |
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), axis.ticks.y = element_blank(), legend.position = 'right', panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
p <- p + labs(title=title) | |
p <- p + labs(x='', y='') | |
p <- p + scale_y_continuous(breaks = NULL) | |
p <- p + facet_grid(site ~ .) | |
# color manually: | |
if(is.na(colors)){ | |
colors <- sample(rainbow(length(levels(factor(df_sub_genus$oligo))), s = 0.5, v = 0.7)) | |
} | |
p <- p + scale_fill_manual(limits = levels(factor(df_sub_genus$oligo)), values = colors) | |
print(p) | |
#plot.new() | |
#gl <- grid.layout(nrow=2, ncol=1) | |
#vp.1 <- viewport(layout.pos.col=1, layout.pos.row=1) | |
#vp.2 <- viewport(layout.pos.col=1, layout.pos.row=2) | |
#pushViewport(viewport(layout=gl)) | |
#pushViewport(vp.1) | |
#par(new=TRUE) | |
#plot(fit, cex = 0.7, sub = paste("Distance metric: ",distance,sep=""), xlab = '') # display dendogram | |
#popViewport() | |
#pushViewport(vp.2) | |
#print(p, newpage = FALSE) | |
#popViewport() | |
return(samples_order) | |
} | |
PLOT_OLIGO_PERCENT <- function(df, oligos, samples_order, site = 'KG', title = 'no title'){ | |
# this function plots the percentage of all reads combined in 'oligos' list to all reads | |
# observed in the 'site' | |
dfx <- data.frame(sample=character(), | |
site=character(), | |
abundance_of_oligos=numeric(), | |
abundance_of_all=numeric(), | |
p_abundance_of_oligos=numeric(), | |
stringsAsFactors=FALSE) | |
names(dfx) <- c('sample', 'site', 'abundance_of_oligos', 'abundance_of_all', 'p_abundance_of_oligos') | |
N <- 0 | |
dfx_sub_site <- df[df$site == site, ] | |
for(sample in samples_order){ | |
dfx_sample_reduced <- dfx_sub_site[dfx_sub_site$sample == sample, ] | |
dfx_oligo_reduced <- dfx_sample_reduced[dfx_sample_reduced$oligo %in% oligos, ] | |
abundance_of_all <- sum(dfx_sample_reduced$abundance) | |
abundance_of_oligos <- sum(dfx_oligo_reduced$abundance) | |
p_abundance_of_oligos <- abundance_of_oligos * 100.0 / abundance_of_all | |
N <- N + 1 | |
dfx[N, ] <- c(sample = sample, site=site, abundance_of_oligos = abundance_of_oligos, abundance_of_all = abundance_of_all, p_abundance_of_oligos = p_abundance_of_oligos) | |
} | |
dfx$p_abundance_of_oligos <- as.numeric(as.character(dfx$p_abundance_of_oligos)) | |
dfx$sample <- ordered(dfx$sample,levels=samples_order) | |
# prepare ggplot object | |
q <- ggplot(dfx, aes(x=factor(sample), y=p_abundance_of_oligos)) | |
q <- q + stat_summary(fun.y = sum, geom = "bar") | |
q <- q + theme(axis.text.x = element_text(angle = 90, size=10, vjust=0, face = 'bold'), legend.position = 'none', axis.ticks.x = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
q <- q + labs(title=title) | |
q <- q + labs(x='', y=paste("Percentage in ", site, sep='')) | |
q <- q + scale_y_sqrt() | |
print(q) | |
} | |
DIST_BETWEEN_TWO_SEQS <- function(s1, s2){ | |
#s1 <- 'AAGGCCTACCAAGGCGACGATCAGTAGCGGGTCTGAGAGGATGATCCGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATTTTGGACAATGGGCGCAAGCCTGATCCAGCCATGCCGCGTGTCTGAAGAAGGCCTTCGGGTTGTAAAGGACTTTTGTCAGGGAAGAAAAGGCTGTTGCTAATATCAGCGGCTGATGACGGTACCTGAAGAATAAGCACCGGCTAACTACGTG' | |
#s2 <- 'AAGGCCTACCAAGGCGACGATCAGTAGCGGGTCTGAGAGGATGATCCGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATTTTGGACAATGGGCGCAAGCCTGATCCAGCCATGCCGCGTGTCTGAAGAAGGCCTTCGGGTTGTAAAGGACTTTTGTCAGGGAAGAAAAGGCTGTTGCTAATATCGACAGCTGATGACGGTACCTGAAGAATAAGCACCGGCTAACTACGTG' | |
#s1 <- 'ATAACG' | |
#s2 <- 'ACCG' | |
s1 <- gsub("-", "", s1) | |
s2 <- gsub("-", "", s2) | |
alignment <- pairwiseAlignment(s1, s2, gapOpening = -2, gapExtension = -8, scoreOnly = FALSE) | |
s1a <- as.character(pattern(alignment)) | |
s2a <- as.character(subject(alignment)) | |
mismatch_map <- strsplit(c(s1a, s2a), split= '') | |
num_mismatches <- length(which(mismatch_map[[1]] != mismatch_map[[2]])) | |
return(100 - (num_mismatches * 100 / nchar(s1a))) | |
} | |
OLIGO_DIST <- function(df, oligos, labels=c(), otu_limits=TRUE){ | |
#oligos <- strep_oligotypes_v3v5 | |
#df <- df_v3v5_seqs | |
#labels <- strep_labels_v3v5 | |
# df = df_v1v3_seqs | |
# oligos = man_oligos_v1v3 | |
# labels = man_labels_v1v3 | |
df_subset <- df[df$OLIGO %in% oligos, ] | |
dfx <- data.frame(OLIGO=character(), | |
REP_SEQ=character(), | |
stringsAsFactors=FALSE) | |
names(dfx) <- c('OLIGO', 'REP_SEQ') | |
N <- 0 | |
for(i in seq(1, length(oligos))){ | |
oligo <- oligos[i] | |
if(length(labels) > 0){ | |
label <- labels[i] | |
} else { | |
label <- oligos[i] | |
} | |
N <- N + 1 | |
dfx[N, ] <- c(OLIGO = label, REP_SEQ = as.character(df_subset[df_subset$OLIGO == oligo, ]$REP_SEQ)) | |
} | |
df_subset <- dfx | |
if(length(labels) > 0){ | |
df_subset$OLIGO <- ordered(df_subset$OLIGO,levels=labels) | |
} else { | |
df_subset$OLIGO <- ordered(df_subset$OLIGO,levels=oligos) | |
} | |
df_subset$OLIGO <- factor(df_subset$OLIGO) | |
num_oligos <- nrow(df_subset) | |
dist_mat <- matrix(nrow=num_oligos, ncol=num_oligos) | |
colnames(dist_mat) <- df_subset$OLIGO | |
rownames(dist_mat) <- df_subset$OLIGO | |
for(i in 1:num_oligos) { | |
for(j in 1:num_oligos){ | |
o1 <- df_subset$OLIGO[i] | |
o2 <- df_subset$OLIGO[j] | |
dist_mat[i,j] <- DIST_BETWEEN_TWO_SEQS(df_subset[o1, ]$REP_SEQ, df_subset[o2, ]$REP_SEQ) | |
} | |
} | |
updated_df <- melt(dist_mat) | |
names(updated_df) <- c('OLIGO1', 'OLIGO2', 'DIST') | |
# find the best order | |
d <- vegdist(dist_mat, method="canberra") | |
fit <- hclust(d, method="complete") #, "single", "complete", "average", "mcquitty", "median" or "centroid" | |
oligos_order <- fit$labels[fit$order] | |
# set the order of samples based on clustering results: | |
updated_df$OLIGO1 <- factor(updated_df$OLIGO1, levels=rev(oligos_order)) | |
updated_df$OLIGO2 <- factor(updated_df$OLIGO2, levels=oligos_order) | |
p <- ggplot(updated_df, aes(OLIGO1, OLIGO2, fill = DIST)) | |
p <- p + geom_tile() | |
if(otu_limits){ | |
p <- p + scale_fill_gradient2(low = "steelblue", high = "red", mid="white", midpoint=98.5, limits=c(97,100), na.value='#bbfdc6') | |
} else { | |
p <- p + scale_fill_gradient2(low = "steelblue", high = "red", mid="white") | |
} | |
p <- p + theme(axis.text.x = element_blank(), legend.position = 'right', axis.ticks.x = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
print(p) | |
} | |
STANDARD_FIG <- function(df, oligos, sites, title, labels, colors, cluster_site = 'SUBP', metric='horn', method = 'ward'){ | |
PLOT_OLIGO_PER_SITE_STACKBAR(df, oligos, sites = sites, title = title, labels= labels, colors = colors) | |
samples_order <- PLOT_OLIGO_EVERYWHERE(df, oligos, cluster_site = cluster_site, sites = sites, method = method, title = title, labels = labels, colors = colors, metric = metric) | |
PLOT_OLIGO_PERCENT(df, oligos, samples_order, site = 'KG', title = paste(title, ' in KG', sep='')) | |
PLOT_OLIGO_PERCENT(df, oligos, samples_order, site = 'SUBP', title = paste(title, ' in SUBP', sep='')) | |
PLOT_OLIGO_PERCENT(df, oligos, samples_order, site = 'TD', title = paste(title, ' in TD', sep='')) | |
} | |
######################################################################################################## | |
# DATA DATA DATA DATA DATA DATA DATA DATA DATA DATA DATA DATA DATA DATA DATA DATA DATA DATA DATA DATA | |
######################################################################################################## | |
setwd("~/Dropbox/Paper_Drafts/OralMicrobiota/analysis-v3v5/") | |
df_v3v5 <- as.data.frame(read.csv('all.env', header=TRUE, sep="\t")) | |
df_v3v5_seqs <- as.data.frame(read.csv('all-seqs.env', header=TRUE, sep="\t")) | |
results_v3v5 <- read.table(file = 'results-expanded.txt', header = TRUE , sep = "\t" , quote = '"') | |
setwd("~/Dropbox/Paper_Drafts/OralMicrobiota/analysis-v1v3/") | |
df_v1v3 <- as.data.frame(read.csv('all.env', header=TRUE, sep="\t")) | |
df_v1v3_seqs <- as.data.frame(read.csv('all-seqs.env', header=TRUE, sep="\t")) | |
all_sites = c('BM', 'HP', 'KG', 'PT', 'SUBP', 'SUPP', 'SV', 'TD', 'TH', 'ST') | |
oral_sites = c('BM', 'HP', 'KG', 'PT', 'SUBP', 'SUPP', 'SV', 'TD', 'TH') | |
base_colors <- c('#FF0000', '#FFCC33', '#FFFF00', '#00FF00', '#009900', '#00FFFF', '#3399FF', '#0000CC', '#FF00FF', '#9933FF', '#009999', '#990099', '#999900', '#990000') | |
levels(df_v3v5$genus) | |
#"manhattan", "euclidean", "canberra", "bray", "kulczynski", "jaccard", "gower", "morisita", "horn", "mountford", "raup" , "binomial" or "chao" | |
# "ward", "single", "complete", "average", "mcquitty", "median" or "centroid" | |
#PLOT_SITE(df, c('Firmicutes'), 'SUBP', 'horn') | |
#PLOT_GENUS(df, 'Firmicutes', 'SUPP', 'horn') | |
######################################################################################################## | |
# FIGURES | |
######################################################################################################## | |
# FIGURE SCATTER V1V3 | |
results <- read.table(file = '~/Dropbox/Paper_Drafts/OralMicrobiota/analysis-v1v3/scatter-plot-data/final_results.txt', header = TRUE , sep = "\t" , quote = "", row.names=1) | |
results <- results[with(results, order(-OBSERVED_PERCENT_OF_PATIENTS_ORAL_CAVITY)), ] | |
min(results$BEST_HIT_ID) | |
pdf('~/scatterx-v1v3.pdf', width=32, height=16) | |
p <- ggplot(data=results, aes(x=MEAN_ABUNDANCE_IN_ORAL, y=OBSERVED_PERCENT_OF_PATIENTS_ORAL_CAVITY,color=PHYLUM, shape=PHYLUM, fill=PHYLUM)) | |
p <- p + geom_point(size=5) | |
p <- p + theme(axis.text.x = element_text(hjust = 1, vjust = 1/2), legend.position="right") | |
p <- p + scale_x_sqrt(breaks=c(0.01,0.1,1,2,4,6,8,10)) | |
p <- p + scale_y_continuous(breaks=c(10,20,30,40,50,60,70,80,90,100)) | |
p <- p + scale_colour_manual(values = c('#FF0000', '#FFCC33', '#444400', '#00FF00', '#009900', '#00DDDD', '#3399FF', '#0000CC', '#FF00FF')) | |
p <- p + scale_shape_manual(values=c(7, 9, 10, 12, 13, 14, 2, 6, 7)) | |
p <- p + theme_bw() | |
print(p) | |
dev.off() | |
# FIGURE SCATTER V1V3 | |
results <- read.table(file = '~/Dropbox/Paper_Drafts/OralMicrobiota/analysis-v3v5/scatter-plot-data/final_results.txt', header = TRUE , sep = "\t" , quote = "", row.names=1) | |
results <- results[with(results, order(-OBSERVED_PERCENT_OF_PATIENTS_ORAL_CAVITY)), ] | |
min(results$BEST_HIT_ID) | |
pdf('~/scatterx-v3v5.pdf', width=32, height=16) | |
p <- ggplot(data=results, aes(x=MEAN_ABUNDANCE_IN_ORAL, y=OBSERVED_PERCENT_OF_PATIENTS_ORAL_CAVITY,color=PHYLUM, shape=PHYLUM, fill=PHYLUM)) | |
p <- p + geom_point(size=5) | |
p <- p + theme(axis.text.x = element_text(hjust = 1, vjust = 1/2), legend.position="right") | |
p <- p + scale_x_sqrt(breaks=c(0.01,0.1,1,2,4,6,8,10)) | |
p <- p + scale_y_continuous(breaks=c(10,20,30,40,50,60,70,80,90,100)) | |
p <- p + scale_colour_manual(values = c('#FF0000', '#FFCC33', '#444400', '#00FF00', '#009900', '#00DDDD', '#3399FF', '#0000CC', '#FF00FF')) | |
p <- p + scale_shape_manual(values=c(7, 9, 10, 12, 13, 14, 2, 6, 7)) | |
p <- p + theme_bw() | |
print(p) | |
dev.off() | |
# FINAL STREP FIGURE WITH ALL SITES | |
final_strep_v3v5 <- c("Firmicutes_GGCTCATTACTTGGTTTCCCTTCTACGT", "Firmicutes_GGCTCATTACTTGGTTTCTCTTCTACGT", "Firmicutes_GGCTCATTGCTTGGTTTCCCTTCTACGT", "Firmicutes_GGTTCGTTATTTGATTCCCCTTCTACGT", "Firmicutes_GGTTCGTTGTTTGACTCCCCTTCTACGT", "Firmicutes_GGTTCATTGTTTGACTCCCCTTCTACGT", "Firmicutes_GGTTCATTGTTTGACTCCTCTTCTACGT", "Firmicutes_GGTTCGTTGTTTGACTTCCCTTCTACGT", "Firmicutes_GGTTCATTGCTTGGCTTCCCTTCTACGT", "Firmicutes_GGTTCGTTGCTTGGCTTCCCTTCTACGT", "Firmicutes_GGTTCGTTGCTTGGCTTCGCTACTACGT") | |
final_strep_labels <- c("S. mitis / oralis / perosis", "S. infantis", "S. sp. (055).", "S. salivarius / vestibularis", "S. anginosus", "S. parasanguinis / cristatus / australis / sinensis", "S. oligofermentans", "S. intermedius / constellatus", "S. sanguinis / agalactiae", "S. gordonii", "S. mutans") | |
final_strep_colors <- c("#FF0000", "#FFCC33", "#9933FF", "#FFFF00", "#009999", "#00FF00", "#009900", "#00FFFF", "#0000CC", "#3399FF", "#FF00FF") | |
pdf('~/final_strep_v3v5.pdf', width=12, height=6) | |
STANDARD_FIG(df=df_v3v5, | |
oligos= final_strep_v3v5, | |
sites = oral_sites, | |
title = 'Strep all sites', | |
labels = final_strep_labels, | |
colors = final_strep_colors, | |
cluster_site = 'SUBP', | |
metric = 'horn', | |
method = 'ward') | |
OLIGO_DIST(df_v3v5_seqs, final_strep_v3v5, final_strep_labels) | |
dev.off() | |
# FINAL FIGURE 4 | |
fuso_l = c("F. nucleatum ss animalis", "F. nucleatum ss vincentii I", "F. nucleatum ss vincentii II", "F. nucleatum ss polymorphum", "F. periodonticum 98.8%", "F. periodonticum") | |
fuso_o = c("Fusobacteria_ACTGTAGGGCTATGAGAT", "Fusobacteria_ACTGTCGGGCTATGAGAT", "Fusobacteria_ACTGTA-AACTATGAGAT", "Fusobacteria_ACTGTA-AACTATAAGAT", "Fusobacteria_ACTGTAAAGCTAT-AGAT", "Fusobacteria_ACTGTAAAGCTCTAAGAT") | |
fuso_c = c("#3399FF", "#00FF00", "#009900", "#00FFFF", "#FF0000", "#FFCC33") | |
porph_l = c("P. sp. (277, 278)", "P. catoniae 98.6%", "P. catoniae 97.2%", "P. catoniae 99.5%", "P. catoniae 98.6%", "P. endodontalis", "P. sp. 98.1% (279)", "P. sp. 98.6% v1 (279)", "P. sp. 99.1% (279)", "P. sp. 99.5% v1 (279)", "P. sp. 98.6% v2 (279)", "P. sp. 99.5% v2 (279)", "P. sp. (279)", "P. sp. 99.1% (279)", "P. gingivalis") | |
porph_o = c("Bacteroidetes_CAGCT-TAGAGGGTCCTCCCCC", "Bacteroidetes_CAGCT-TAGAAGGTCCGTCCCC", "Bacteroidetes_CCGCT-TAGGAGGTCCGTCCCC", "Bacteroidetes_CAGCT-TAGAAGGTCCATCCCC", "Bacteroidetes_CAGCTTATAAAAGTCTGTCCCC", "Bacteroidetes_CAGCCCATAGCGCTGCTGTCCC", "Bacteroidetes_CCGCT-TTAAGGTTACTCCGTC", "Bacteroidetes_CAGCTTATAAGGTTACCCCGTC", "Bacteroidetes_CAGCTTATAAGGTTACTCCGTC", "Bacteroidetes_CAGCT-TTAAGGTAACTCCGTC", "Bacteroidetes_CAGCT-TTAAGGCAGCTCCGTC", "Bacteroidetes_CAGCT-TTAAGGTTGCTCCGTC", "Bacteroidetes_CAGCT-TTAAGGTTACTCCGTC", "Bacteroidetes_CAGCT-TTAAGGCTGCTCCGTC", "Bacteroidetes_CAGCACAAAGGGAATTGCCGAC") | |
porph_c = c("#999999", "#9933FF", "#00FFFF", "#3399FF", "#0000CC", "#FF00FF", "#00FF00", "#009900", "#009999", "#990000", "#CC6633", "#FFFF00", "#FFCC33", "#FF0000", "#000000") | |
camp_l = c("C. showae 99.6%","C. showae 99.2%","C. gracilis","C. concisus 99.6% v1","C. concisus 99.2% v1","C. concicus 99.6% v2","C. concisus 99.2% v2","C. concisus") | |
camp_o = c("Epsilonproteobacteria_GTTAACGCACGTTG","Epsilonproteobacteria_GCTAACGCACGTTG","Epsilonproteobacteria_GCTGACACCCATTC","Epsilonproteobacteria_GTCAACGCACGTTC","Epsilonproteobacteria_GTCAACGCATACTC","Epsilonproteobacteria_GTCAACGCACACTC","Epsilonproteobacteria_GTCAACGCACACCC","Epsilonproteobacteria_GTCAACGCACATTC") | |
camp_c = c("#FF0000","#FFCC33","#FFFF00","#FF00FF","#3399FF","#00FF00","#009900","#00FFFF") | |
# FIGURE MOST ABUNDANT NEISSERIA FIGURE MOST ABUNDANT NEISSERIA FIGURE MOST ABUNDANT NEISSERIA FIGURE MOST ABUNDANT NEISSERIA FIGURE MOST ABUNDANT NEISSERIA FIGURE MOST ABUNDANT NEISSERIA | |
# V1V3 | |
man_labels_v1v3 = c('Neisseria elongata 99.6% (598)', | |
'Neisseria flavescens (610)', | |
'Neisseria flavescens 98.8% (610)', | |
'Neisseria flavescens 99.6% (610)', | |
'Neisseria oralis (016, 015, 014)', | |
'Neisseria polysaccharea / meningitidis (737, 669))', | |
'Neisseria sicca / mucosa / flava / oralis (764, 682, 609, 014)', | |
'Neisseria subflava (476)') | |
man_oligos_v1v3 = c('Betaproteobacteria_ATGCCGAGGCCGTTCAA', | |
'Betaproteobacteria_ATGCCCTGTTGACGCGA', | |
'Betaproteobacteria_ATGCCCTGCCGACGTGA', | |
'Betaproteobacteria_ATGCCCTGTCGACGCGA', | |
'Betaproteobacteria_ATGCCGCGTCATTCGGA', | |
'Betaproteobacteria_ATGCCCTGTTAGCGCGA', | |
'Betaproteobacteria_ATGCCGCGGCCCTTCGA', | |
'Betaproteobacteria_ATGCCATAGCCCTTCGA') | |
man_sites_v1v3 = c('KG', 'SUBP', 'PT') | |
man_colors_v1v3 <- base_colors[1:length(man_oligos_v1v3)] | |
pdf('~/man_v1v3.pdf', width=12, height=6) | |
STANDARD_FIG(df=df_v1v3, | |
oligos= man_oligos_v1v3, | |
sites = oral_sites, | |
title = 'most abundant Neisseria v1v3', | |
labels = man_labels_v1v3, | |
colors = man_colors_v1v3, | |
cluster_site = 'SUBP', | |
metric = 'horn', | |
method = 'ward') | |
OLIGO_DIST(df_v1v3_seqs, man_oligos_v1v3, man_labels_v1v3) | |
dev.off() | |
# FIGURE MOST ABUNDANT NEISSERIA FIGURE MOST ABUNDANT NEISSERIA FIGURE MOST ABUNDANT NEISSERIA FIGURE MOST ABUNDANT NEISSERIA FIGURE MOST ABUNDANT NEISSERIA FIGURE MOST ABUNDANT NEISSERIA | |
# V3V5 | |
man_oligos_v3v5 = c("Betaproteobacteria_GAATTTTA-CTCTATCT", | |
"Betaproteobacteria_GAACTTTA-TTCTATTT", | |
"Betaproteobacteria_GAACTTTA-TTTCATTT", | |
"Betaproteobacteria_GAACCTTA-CTCTAACT", | |
"Betaproteobacteria_GAACCATA-CTCTAACT") | |
man_labels_v3v5 = c("N. elongata (HOT-598)", | |
"N. flavescens / subflava (610, 476)", | |
"N. flavescens / subflava 99.2% (610, 476)", | |
"N. pharyngis (HOT-729)", | |
"N. sicca / mucosa / flava (764, 682, 609, 015)") | |
man_sites_v3v5 = c('KG', 'SUBP', 'PT') | |
man_colors_v3v5 <- base_colors[1:length(man_oligos_v3v5)] | |
pdf('~/man_v3v5.pdf', width=12, height=6) | |
STANDARD_FIG(df=df_v3v5, | |
oligos= man_oligos_v3v5, | |
sites = oral_sites, | |
title = 'most abundant Neisseria v3v5', | |
labels = man_labels_v3v5, | |
colors = man_colors_v3v5, | |
cluster_site = 'SUBP', | |
metric = 'horn', | |
method = 'ward') | |
OLIGO_DIST(df_v3v5_seqs, man_oligos_v3v5, man_labels_v3v5) | |
dev.off() | |
# FIGURE HOT-279 V1V3 HOT-279 V1V3 HOT-279 V1V3 HOT-279 V1V3 HOT-279 V1V3 HOT-279 V1V3 HOT-279 V1V3 HOT-279 V1V3 HOT-279 V1V3 HOT-279 V1V3 HOT-279 V1V3 HOT-279 V1V3 HOT-279 V1V3 | |
h2_labels_v1v3 = c("HOT 279 V1 100% ", | |
"HOT 279 V2 99.5%", | |
"HOT 279 V3 99.5%", | |
"HOT 279 V4 99.1%", | |
"HOT 279 V5 99.1%", | |
"HOT 279 V6 98.1%", | |
"HOT 279 V7 98.6%") | |
h2_oligos_v1v3 = c("Bacteroidetes_CAGCT-TTAAGGTTACTCCGTC", | |
"Bacteroidetes_CAGCT-TTAAGGTTGCTCCGTC", | |
"Bacteroidetes_CAGCT-TTAAGGTAACTCCGTC", | |
"Bacteroidetes_CAGCT-TTAAGGCTGCTCCGTC", | |
"Bacteroidetes_CAGCTTATAAGGTTACTCCGTC", | |
"Bacteroidetes_CAGCTTATAAGGTTACCCCGTC", | |
"Bacteroidetes_CCGCT-TTAAGGTTACTCCGTC") | |
h2_sites_v1v3 = c('KG', 'SUBP', 'PT') | |
h2_colors_v1v3 <- base_colors[1:length(h2_oligos_v1v3)] | |
pdf('~/h2_v1v3.pdf', width=12, height=6) | |
STANDARD_FIG(df=df_v1v3, | |
oligos= h2_oligos_v1v3, | |
sites = oral_sites, | |
title = 'HOT 279 v1v3', | |
labels = h2_labels_v1v3, | |
colors = h2_colors_v1v3, | |
cluster_site = 'SUBP', | |
metric = 'horn', | |
method = 'ward') | |
OLIGO_DIST(df_v1v3_seqs, h2_oligos_v1v3, h2_labels_v1v3) | |
dev.off() | |
# FIGURE HOT-279 V3V5 HOT-279 V3V5 HOT-279 V3V5 HOT-279 V3V5 HOT-279 V3V5 HOT-279 V3V5 HOT-279 V3V5 HOT-279 V3V5 HOT-279 V3V5 HOT-279 V3V5 HOT-279 V3V5 HOT-279 V3V5 | |
h2_oligos_v3v5 = c("Bacteroidetes_AGTATCAGAAGATA-TACAGTAGTTATG", "Bacteroidetes_GGTATCAGAAGATA-TACAGTAGTTATG") | |
h2_labels_v3v5 = c("HOT 279 V1 100%", "HOT 279 V2 98.3%") | |
h2_sites_v3v5 = c('KG', 'SUBP', 'PT') | |
h2_colors_v3v5 <- base_colors[1:length(h2_oligos_v3v5)] | |
pdf('~/h2_v3v5.pdf', width=12, height=6) | |
STANDARD_FIG(df=df_v3v5, | |
oligos= h2_oligos_v3v5, | |
sites = oral_sites, | |
title = 'HOT 279 v3v5', | |
labels = h2_labels_v3v5, | |
colors = h2_colors_v3v5, | |
cluster_site = 'KG', | |
metric = 'horn', | |
method = 'ward') | |
OLIGO_DIST(df_v3v5_seqs, h2_oligos_v3v5, h2_labels_v3v5) | |
dev.off() | |
# FIGURE Strep mitis variants V1V3 | |
sm_oligos_v1v3 = c("Firmicutes_-CTCAGTGTTGCGAGTGGTAGTTCATACTTCG-", | |
"Firmicutes_-CTTAGTGTTGCGAGTGGTAGTTCACACTTCG-", | |
"Firmicutes_-CTCAGTATTGCGAGTGGTAGTTCACACTTCG-", | |
"Firmicutes_-CTCAGTGTTGCGAGTGGTAGTTCACGCTTCG-", | |
"Firmicutes_-CTCAGTGTTGTGAGTGGTAGTTCACACTTCG-") | |
sm_labels_v1v3 = c("S. mitis V1 99.6%", | |
"S. mitis V2 99.6%", | |
"S. mitis V3 99.6%", | |
"S. mitis V4 99.6%", | |
"S. mitis V5 99.6%") | |
sm_sites_v1v3 = c('KG', 'SUBP', 'PT') | |
sm_colors_v1v3 <- base_colors[1:length(sm_oligos_v1v3)] | |
pdf('~/sm_v1v3.pdf', width=12, height=6) | |
STANDARD_FIG(df=df_v1v3, | |
oligos= sm_oligos_v1v3, | |
sites = oral_sites, | |
title = 'Strep mitis variants v1v3', | |
labels = sm_labels_v1v3, | |
colors = sm_colors_v1v3, | |
cluster_site = 'KG', | |
metric = 'horn', | |
method = 'ward') | |
OLIGO_DIST(df_v1v3_seqs, sm_oligos_v1v3, sm_labels_v1v3) | |
dev.off() | |
# FIGURE STREP OLIGOS V3V5 | |
strep_oligos_v3v5 = c('Firmicutes_GGCTCATTACTTGGTTTCCCTTCTACGT', 'Firmicutes_GGCTCATTACTTGGTTTCTCTTCTACGT', 'Firmicutes_GGTTCGTTATTTGATTCCCCTTCTACGT', 'Firmicutes_GGTTCATTGTTTGACTCCCCTTCTACGT', 'Firmicutes_GGTTCATTGCTTGGCTTCCCTTCTACGT', 'Firmicutes_GGTTCGTTGCTTGGCTTCCCTTCTACGT', 'Firmicutes_GGTTCGTTGTTTGACTTCCCTTCTACGT', 'Firmicutes_GGTTCGTTGCTTGGCTTCGCTACTACGT') | |
strep_labels_v3v5 = c('S.peroris (728) / S.oralis (707) / S. mitis (677, 398) / S. sp. (431, 423, 071)', 'S. infantis (638) / S. sp. (486, 074, 061, 058)', 'S. salivarius (755) / S. vestibularis (021)', 'S. sinensis (767) / S. parasanguinis I (721) / S. parasanguinis II (411) / S. cristatus (578) / S. australis(073) / S. sp. (069, 067)', 'S. sanguinis (758) / S. agalactiae (537)', 'S. gordonii (622) / S. sp. (056)', 'S. intermedius (644) / S. constellatus (576)', 'S. mutans (686)') | |
strep_sites_v3v5 = c('SUBP', 'SUPP', 'BM', 'HP', 'KG', 'PT', 'SV', 'TD', 'TH') | |
strep_colors_v3v5 <- base_colors[1:length(strep_oligos_v3v5)] | |
pdf('~/strep_oligos_v3v5.pdf', width=12, height=6) | |
STANDARD_FIG(df=df_v3v5, | |
oligos= strep_oligos_v3v5, | |
sites = oral_sites, | |
title = 'STREP SITE v3v5', | |
labels = strep_labels_v3v5, | |
colors = strep_colors_v3v5, | |
cluster_site = 'SUBP', | |
metric = 'horn', | |
method = 'ward') | |
OLIGO_DIST(df_v3v5_seqs, strep_oligos_v3v5, strep_labels_v3v5) | |
dev.off() | |
# FIGURE STREP OLIGOS V1V3 | |
strep_oligos_v1v3 = c('Firmicutes_-CTCAGTGTTGCGAGTGGTAGTTCACACTTCG-', | |
'Firmicutes_-CTCAATGTTGCGAGTGGTAGTTCACACTTCG-', | |
'Firmicutes_-CTCTGTCCTCCGAGTGGTAGTTCACACGTCG-', | |
'Firmicutes_-CTCAGTGTTGCGAGTGGTAGTTCACACATCG-', | |
'Firmicutes_-CTCAGTCCTGCGAGTGGTAGTTCACACATCG-', | |
'Firmicutes_-CTCAGTGCTGCGGGTGGTAGTTCACACTTCG-', | |
'Firmicutes_-CTCTGTATTGCGGGTGGTAGTTCACACTTCG-', | |
'Firmicutes_-CTCTGTGCTGCGAGTGGTAGTTCATACCACG-', | |
'Firmicutes_-GTTAGTATTCCGTGTGGTAGTTCACACGTCG-', | |
'Firmicutes_-CTCAGTCCTGCGAGTGGTAGTTCACGCTTCG-', | |
'Firmicutes_-CTCAATCCTGCGAGTGGTAGTTCACACATCG-', | |
'Firmicutes_-CTCTGTCCTGCGAGTGGTAGTTCACACTTCG-') | |
strep_labels_v1v3 = c('S. mitis I / mitis II / australis / pneumoniae', | |
'S. oralis / infantis / mitis bv II', | |
'S. salivarius / S. vestibularis', | |
'S. parasanguinis I', | |
'S. parasanguinis II', | |
'S. sanguinis', | |
'S. gordonii', | |
'S. intermedius', | |
'S. mutans', | |
'S. peroris', | |
'S. sp. (HOT-065)', | |
'S. sp. (HOT-056)') | |
strep_sites_v1v3 = c('SUBP', 'SUPP', 'BM', 'HP', 'KG', 'PT', 'SV', 'TD', 'TH') | |
strep_colors_v1v3 <- base_colors[1:length(strep_oligos_v1v3)] | |
pdf('~/strep_oligos_v1v3.pdf', width=12, height=6) | |
STANDARD_FIG(df=df_v1v3, | |
oligos= strep_oligos_v1v3, | |
sites = oral_sites, | |
title = 'STREP SITE v1v3', | |
labels = strep_labels_v1v3, | |
colors = strep_colors_v1v3, | |
cluster_site = 'SUBP', | |
metric = 'horn', | |
method = 'ward') | |
OLIGO_DIST(df_v1v3_seqs, strep_oligos_v1v3, strep_labels_v1v3) | |
dev.off() | |
# FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 FIGURE 4 Simonsiella muelleri | |
samples_of_interest <- c('158216035', '159227541', '159470302', '160016515', '404239096', '432193348', '533247696', '604812005', '638754422', '737052003', '763880905', '764002286', '764224817', '764649650', '764892411', '765034022', '892969023') | |
simonsiella_oligos <- c('Betaproteobacteria_GAACTTAA-TGCTAATT', 'Betaproteobacteria_GAGCTTAA-TGCTAATT') | |
simonsiella_labels <- c('Simonsiella muelleri 98.7%', 'Simonsiella muelleri 98.7%') | |
simonsiella <- read.table(file = '~/Dropbox/Paper_Drafts/OralMicrobiota/analysis/Simonsiella.env', header = TRUE , sep = "\t" , quote = "") | |
simonsiella_matrix <- acast(simonsiella, SAMPLE~OLIGO, value.var="ABUNDANCE", fill = 0) | |
d <- vegdist(simonsiella_matrix, method='horn') | |
fit <- hclust(d, method='ward') # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid" | |
samples_order <- fit$labels[fit$order] | |
simonsiella$SAMPLE <- ordered(simonsiella$SAMPLE,levels=samples_order) | |
pdf('~/simonsiella.pdf') | |
p <- ggplot(simonsiella, aes(x=factor(SAMPLE), y=ABUNDANCE, group=OLIGO, fill = OLIGO)) | |
p <- p + geom_bar(position="fill", stat = "identity", width=0.90, colour = 'black') | |
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), axis.ticks.y = element_blank(), legend.position = 'right', panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
ccc <- sample(heat.colors(2)) | |
p <- p + scale_fill_manual(values = ccc) | |
print(p) | |
dev.off() | |
# FIGURE 2 FIGURE 2 FIGURE 2 FIGURE 2 FIGURE 2 FIGURE 2 FIGURE 2 FIGURE 2 FIGURE 2 FIGURE 2 | |
hot_279_oligos = c('Bacteroidetes_AGTATCAGAAGATA-TACAGTAGTTATG', 'Bacteroidetes_GGTATCAGAAGATA-TACAGTAGTTATG', 'Bacteroidetes_AGTATCAGATGATA-TACAGTAGTTATG', 'Bacteroidetes_AGTATCAGAAGATA-TACGGTAGTTATG') | |
hot_279_labels = c('HOT 279 V1 100%', 'HOT 279 V2 98.3%', 'HOT 279 V3 99.6%', 'HOT 279 V4 99.6%') | |
hot_279_title = 'Distribution of HOT 279 Variants' | |
hot_279_sites = c('KG', 'SUBP', 'PT') | |
#saveRDS(hot_279_colors, file="~/Dropbox/Paper_Drafts/OralMicrobiota/figures/hot_279_colors") | |
hot_279_colors <- readRDS("~/Dropbox/Paper_Drafts/OralMicrobiota/figures/hot_279_colors") | |
pdf('~/oral_stuff_hot_279.pdf', width=20, height=8) | |
PLOT_OLIGO_PER_SITE_STACKBAR(df_v3v5, hot_279_oligos, sites = strep_sites_v3v5, title = hot_279_labels, labels=hot_279_labels, colors = hot_279_colors) | |
samples_order <- PLOT_OLIGO_EVERYWHERE(df_v3v5, hot_279_oligos, cluster_site = 'KG', sites = hot_279_sites, method = 'complete', title = hot_279_title, labels = hot_279_labels, colors = hot_279_colors) | |
PLOT_OLIGO_PERCENT(df_v3v5, hot_279_oligos, samples_order, site = 'KG', title = paste(hot_279_title, ' in KG', sep='')) | |
PLOT_OLIGO_PERCENT(df_v3v5, hot_279_oligos, samples_order, site = 'SUBP', title = paste(hot_279_title, ' in SUBP', sep='')) | |
PLOT_OLIGO_PERCENT(df_v3v5, hot_279_oligos, samples_order, site = 'PT', title = paste(hot_279_title, ' in PT', sep='')) | |
dev.off() | |
# FIGURE 3 PARAINFLUENZA V3V5 | |
figure_3_oligos = rev(c('Gammaproteobacteria_ATAGAATTTGTTCTC', 'Gammaproteobacteria_ATAGAATTTGATCTC', 'Gammaproteobacteria_ATAAAGTTTGACCTC', 'Gammaproteobacteria_ATAGAGTTTGATCTC', 'Gammaproteobacteria_ATAGAGTTTGACCTC', 'Gammaproteobacteria_ACAGAATTTAATCTC', 'Gammaproteobacteria_ATAGAATATAATCTC', 'Gammaproteobacteria_ATAGAATTTAGTCTC', 'Gammaproteobacteria_ATAGAATTTAATCTC', 'Gammaproteobacteria_ATAGAGTATAATCTC', 'Gammaproteobacteria_ATAGGGTTTAACCTC', 'Gammaproteobacteria_AAAGAGTTTAACCTC', 'Gammaproteobacteria_ATAGAGTTTAATCTC', 'Gammaproteobacteria_ATAGAGTTTAACCTC')) | |
figure_3_labels = rev(c('T. aromaticivorans V5 98.7', 'T. aromaticivorans V4 99.2', 'T. aromaticivorans V3 99.6', 'T. aromaticivorans V2 99.6', 'T. aromaticivorans V1 100', 'H. parainfluenzae V9 98.7', 'H. parainfluenzae V8 98.7', 'H. parainfluenzae V7 98.7', 'H. parainfluenzae V6 99.2', 'H. parainfluenzae V5 99.2', 'H. parainfluenzae V4 99.6', 'H. parainfluenzae V3 99.6', 'H. parainfluenzae V2 99.6', 'H. parainfluenzae V1 100')) | |
figure_3_title = 'Distribution of T. aromaticivorans and H. parainfluenzae' | |
figure_3_sites = c( 'SUBP', 'SUPP','BM', 'HP', 'KG', 'PT', 'SV', 'TD', 'TH') | |
figure_3_colors <- base_colors[1:length(figure_3_oligos)] | |
pdf('~/terra-v3v5.pdf', width=12, height=6) | |
STANDARD_FIG(df=df_v3v5, | |
oligos= figure_3_oligos, | |
sites = oral_sites, | |
title = figure_3_title, | |
labels = figure_3_labels, | |
colors = figure_3_colors, | |
cluster_site = 'SUBP', | |
metric = 'horn', | |
method = 'ward') | |
OLIGO_DIST(df_v3v5_seqs, figure_3_oligos, figure_3_labels) | |
dev.off() | |
# FIGURE 5, SUBP - PT vs SUPP vs PT | |
results <- results_v3v5 | |
subp_preferred <- as.data.frame(read.csv('SUBP-preferred-taxa-v3v5.txt', header=TRUE, sep="\t")) | |
supp_preferred <- as.data.frame(read.csv('SUPP-preferred-taxa-v3v5.txt', header=TRUE, sep="\t")) | |
subp_associated <- results[results$OLIGO %in% subp_preferred$OLIGO & grepl("SUBP", results$ABUNDANT_IN) == 1, ] | |
supp_associated <- results[results$OLIGO %in% supp_preferred$OLIGO & grepl("SUPP", results$ABUNDANT_IN) == 1, ] | |
nrow(subp_associated) | |
nrow(supp_associated) | |
pdf("~/subp_pt.pdf", width=10, height=10) | |
ggplot(data=subp_associated, aes(x=SUBP, y=PT)) + geom_point(aes(size=SUBP)) + stat_smooth(method='lm', size=2) + scale_size(range = c(2, 10)) + theme_bw() + theme(legend.position="none") | |
ggplot(data=supp_associated, aes(x=SUPP, y=PT)) + geom_point(aes(size=SUPP)) + stat_smooth(method='lm', size=2) + scale_size(range = c(2, 10)) + theme_bw() + theme(legend.position="none") | |
dev.off() | |
subp_fit <- lm(subp_associated$SUBP ~ subp_associated$PT) | |
summary(subp_fit) | |
supp_fit <- lm(supp_associated$SUBP ~ supp_associated$PT) | |
summary(supp_fit) | |
# FIGURE Plaque oligos | |
# TAXA that is abundant in SUPP and SUBB, versus everywhere else with most differential mean abundance: | |
f7_title = "Oligotype Differentially Abundant in Plaque" | |
f7_labels = c('01 Corynebacterium matruchotii 98.6%', '02 Fusobacterium ss. nucleatum / polymorphum', '03 Streptococcus sanguinis / agalactiae', '04 Actinomyces oris / naeslundii', '05 Fusobacterium naviforme / vincentii', '06 Streptococcus infantis', '07 Streptococcus salivarius / vestibularis', '08 Fusobacterium periodonticum', '09 Neisseria flavescens / subflavai 97.9%', '10 Prevotella scopos / melaninogenica') | |
f7_oligos = c('Actinobacteria_ATGAGAATCCGT--TTTCG', 'Fusobacteria_TGTGGAACAATA', 'Firmicutes_GGTTCATTGCTTGGCTTCCCTTCTACGT', 'Actinobacteria_ACGAATGCGCCTCGCTCTG', 'Fusobacteria_TATGGAACAATA', 'Firmicutes_GGCTCATTACTTGGTTTCTCTTCTACGT', 'Firmicutes_GGTTCGTTATTTGATTCCCCTTCTACGT', 'Fusobacteria_CATGAAACATTA', 'Betaproteobacteria_GAACTTTA-TTCTATTT', 'Bacteroidetes_AGTGCA-TAACCT--GG-TATCATCTCG') | |
f7_colors = c('#00FF00', '#22FF22', '#44FF44', '#66FF66', '#88FF88', '#FF0000', '#FF2222', '#FF4444', '#FF6666', '#FF8888') | |
f7_colors = base_colors[1:10] | |
f7_sites = c( 'SUBP', 'SUPP','BM', 'HP', 'KG', 'PT', 'SV', 'TD', 'TH') | |
pdf('~/oral-figure-7-draft.pdf', width=32, height=16) | |
PLOT_OLIGO_PER_SITE_STACKBAR(df_v3v5, f7_oligos, sites = f7_sites, title = f7_title, labels=f7_labels, colors = f7_colors) | |
samples_order <- PLOT_OLIGO_EVERYWHERE(df_v3v5, f7_oligos, cluster_site = 'SUBP', sites = strep_sites_v3v5, method = 'complete', title = f7_title, labels = f7_labels, colors = f7_colors) | |
dev.off() | |
# FIGURE TONSILS PATHOGENS FIGURE TONSILS PATHOGENS FIGURE TONSILS PATHOGENS FIGURE TONSILS PATHOGENS FIGURE TONSILS PATHOGENS FIGURE TONSILS PATHOGENS FIGURE TONSILS PATHOGENS | |
t_pat_sites = c('PT', 'TD') | |
t_pat <- as.data.frame(read.csv('SUBP-preferred-taxa-v3v5.txt', header=TRUE, sep="\t")) | |
t_pat_title = 'Tonsil pathogens V1V3' | |
plots <- list() | |
for(i in seq(1:length(t_pat$OLIGO))){ | |
print(i) | |
plots[[i]] <- PLOT_OLIGO_PER_SITE_BOXES_PERCENTS(df_v3v5, t_pat$OLIGO[i], title=t_pat$NAME[i], sites = t_pat_sites) | |
} | |
pdf(paste('~/t_pat_v3v5_sqrt.pdf', sep=''), width=1.8 * length(plots), height=8) | |
multiplot(plotlist = plots, cols = length(plots)) | |
dev.off() | |
# FIGURE PORPHYROMONAS | |
por_labels = c("Porphyromonas catoniae (284,283,277,275)", | |
"Porphyromonas catoniae (284,283,277,275) 99.6%", | |
"Porphyromonas catoniae (284,283,277,275) 98.3%", | |
"Porphyromonas sp. (285)", | |
"Porphyromonas endodontalis (273) v1", | |
"Porphyromonas endodontalis (273) v2", | |
"Porphyromonas sp. (279)", | |
"Porphyromonas sp. (279) 99.6% v1", | |
"Porphyromonas sp. (279) 99.6% v2", | |
"Porphyromonas sp. (279) 98.3%", | |
"Porphyromonas gingivalis (619)", | |
"Porphyromonas gingivalis (619) 99.6%") | |
por_colors <- base_colors[1:length(por_oligos)] | |
por_oligos = c("Bacteroidetes_AATATCAGAAGATA-TACAGTAGTTATG", | |
"Bacteroidetes_AATATCAGACGATA-TACAGTAGTTATG", | |
"Bacteroidetes_GATATCAGAAGATA-TACAGTAGTTATG", | |
"Bacteroidetes_AGGATCGGAAAATA-TACAGTAGATTTG", | |
"Bacteroidetes_AGGATCGGAAAATA-TACTGTAGATTTG", | |
"Bacteroidetes_AGGATCGGAAAATA-TAGTGTAGATTTG", | |
"Bacteroidetes_AGTATCAGAAGATA-TACAGTAGTTATG", | |
"Bacteroidetes_AGTATCAGAAGATA-TACGGTAGTTATG", | |
"Bacteroidetes_AGTATCAGATGATA-TACAGTAGTTATG", | |
"Bacteroidetes_GGTATCAGAAGATA-TACAGTAGTTATG", | |
"Bacteroidetes_GGTATG-GAAGATA-TACCGTAGTTATG", | |
"Bacteroidetes_GGTATG-GAAGATA-TACCGTAATTATG") | |
pdf("~/ORAL_POR.pdf", width=12, height=6) | |
STANDARD_FIG(df=df_v3v5, | |
oligos= por_oligos, | |
labels = por_labels, | |
sites = oral_sites, | |
title = "Porphyromonas oligos", | |
colors = por_colors, | |
cluster_site = "KG", | |
metric = 'horn', | |
method = 'ward') | |
OLIGO_DIST(df_v3v5_seqs, por_oligos, por_labels) | |
dev.off() | |
######################################################################################################################## | |
######################################################################################################################## | |
# | |
# We wanted to see the distribution of oligos found in all genera that resulted in more than 2 oligos that are not ST specific. | |
# Genera of interest came from the GAST column in the results.txt using using this command. Then, we filtered out the ones that | |
# are specific to ST, and less than 2 oligos using this command: | |
# for genus in Prevotella Bacteroides Treponema Streptococcus Leptotrichia Capnocytophaga Haemophilus Fusobacterium \ | |
# Porphyromonas Johnsonella Actinomyces Neisseria Selenomonas Corynebacterium Bergeyella Veillonella \ | |
# Roseburia Parabacteroides Oscillibacter Oribacterium Campylobacter Barnesiella Alistipes Aggregatibacter \ | |
# Ruminococcus Rothia Propionibacterium Parasutterella Paraprevotella Moryella Megasphaera Kingella Atopobium \ | |
# Actinobacillus Tannerella Sutterella Subdoligranulum Peptococcus Paludibacter Moraxella Lachnospira Gemella \ | |
# Faecalibacterium Eubacterium Dialister Derxia Coprococcus Comamonas Catonella Butyrivibrio Blautia Actinobaculum | |
# do | |
# echo -n $genus "= " | |
# grep $genus results-expanded.txt | awk 'BEGIN{FS="\t"}{if($31 != "ST")print $1}' | awk '{printf("\"%s\", ", $1)}'; echo | |
# done | awk '{if(NF>3) print $0}' | |
# following assignments are based on the output the previous command generated. | |
# | |
######################################################################################################################## | |
######################################################################################################################## | |
genera = c("Prevotella", "Bacteroides", "Treponema", "Streptococcus", "Leptotrichia", "Capnocytophaga", "Haemophilus", | |
"Fusobacterium", "Porphyromonas", "Johnsonella", "Actinomyces", "Neisseria", "Selenomonas", "Corynebacterium", | |
"Bergeyella", "Veillonella", "Oribacterium", "Campylobacter", "Aggregatibacter", "Rothia", "Propionibacterium", | |
"Moryella", "Megasphaera", "Kingella", "Atopobium", "Actinobacillus", "Tannerella", "Peptococcus", "Paludibacter", | |
"Moraxella", "Lachnospira", "Gemella", "Eubacterium", "Derxia", "Comamonas", "Catonella", "Actinobaculum") | |
genera_oligos = list(c("Bacteroidetes_AGAGTTATGAC-CT-G--CGTTATCTCG", "Bacteroidetes_AGCGCA-GGGCCTT-G--CGCCGTCTCG", "Bacteroidetes_AGCGCCAGGGT-TT-T--GCCTATCTCG", "Bacteroidetes_AGCGCTAGAGCATT-AT-GTCGGTTTTG", "Bacteroidetes_AGCGCTAGGGCATT-AT-ATTGGTTTTG", "Bacteroidetes_AGCGCTAGGGCGTT-TA-GTCGGTTTTG", "Bacteroidetes_AGCGCTAGGGCTTT-AT-GTCGGTTTTG", "Bacteroidetes_AGCGCTAGGGCTTT-TA-GTCGGTTTTG", "Bacteroidetes_AGGGTTATGAC-CT-A--CGTTATCTCG", "Bacteroidetes_AGGGTTATGAC-CT-G--CGTTATCTCG", "Bacteroidetes_AGTACCATGACTTT-TA-GTTCGTCTCG", "Bacteroidetes_AGTGCA-GAATCCT-A--AATTATCTCG", "Bacteroidetes_AGTGCA-GAATCTT-A--AATTATCTCG", "Bacteroidetes_AGTGCA-GAATCTT-T--AATTATCTCG", "Bacteroidetes_AGTGCA-GGAC-CT-A--TATCATCTCG", "Bacteroidetes_AGTGCA-GGAC-CT-G--TATCATCTCG", "Bacteroidetes_AGTGCA-GGAC-GT-G--TATCATCTCG", "Bacteroidetes_AGTGCA-GGAC-TT-G--TATCATCTCG", "Bacteroidetes_AGTGCA-TAACCT--GG-TATCATCTCG", "Bacteroidetes_AGTGCA-TGAC-CT-G--TATCATCTCG", "Bacteroidetes_AGTGCA-TGAC-TT-G--TATCATCTCG", "Bacteroidetes_AGTGCA-TGGC-CT-G--TATCATCTCG", "Bacteroidetes_AGTGCC-TGAC-CT-A--TATCATCTCG", "Bacteroidetes_AGTGCC-TGAC-CT-G--TATCATCTCG", "Bacteroidetes_AGTGCCAGAAC-AT-TA-AGTCATCTCG", "Bacteroidetes_AGTGCCAGAAC-AT-TA-GGTCATCTCG", "Bacteroidetes_AGTGCCAGAGT-TT-TA-GTTTGTCTCG", "Bacteroidetes_AGTGCCAGAGT-TT-TT-AATGATCTCG", "Bacteroidetes_AGTGCCAGGGT-TT-T--AGTCATCTCG", "Bacteroidetes_AGTGCCAGGGT-TT-T--GCCTATCTCG", "Bacteroidetes_AGTGCCAGGGT-TT-T--GTCTATCTCG", "Bacteroidetes_AGTGCCAGGGTCTT-AT-GGTCGTCTCG", "Bacteroidetes_AGTGCCAGGGTCTT-GC-GGTCGTCTCG", "Bacteroidetes_AGTGCCAGGGTTGT-AT-GGTCGTCTCG", "Bacteroidetes_AGTGCCAGGGTTGT-GT-GGTCGTCTCG", "Bacteroidetes_AGTGCCAGGGTTTG-AT-GGTCGTCTCG", "Bacteroidetes_AGTGCCAGGGTTTT-GA-GGCCGTCTCG", "Bacteroidetes_AGTGCCAGGGTTTT-GT-GGTCGTCTCG", "Bacteroidetes_AGTGCCGGGGT-TA-T--GTCTATCTCG", "Bacteroidetes_AGTGCTTGGGTTAT-AA-AGTCATCTCG", "Bacteroidetes_AGTGCTTGGGTTTT-AA-AGTCATCTCG", "Bacteroidetes_AGTGCTTTGACTTT-AT-GGTCGTCTCG", "Bacteroidetes_AGTGCTTTGACTTTAAT-GGTCGTCTCG", "Bacteroidetes_AGTGCTTTGACTTTTAT-GGTCGTCTCG", "Bacteroidetes_AGTGTCAGAAC-TA-GA-AGTAATTTAG", "Bacteroidetes_AGTGTCAGAAC-TA-GA-TGTAATTTAG", "Bacteroidetes_AGTGTCAGGAC-TA-TA-TGTAGTTTTG", "Bacteroidetes_ATTGCTTGGGT--T-T--AGCCATCTCG", "Bacteroidetes_ATTGCTTGGGT-TT-T--AGCCATCTCG", "Bacteroidetes_CGGGTTTTGGC-CT-G--CGTTATCTCG", "Bacteroidetes_GGCGCCTGGGT-CT-T--GGTCGTCTCG", "Bacteroidetes_GGCGCCTGGGT-TT-C--GGTCGTCTCG", "Bacteroidetes_GGCGCCTGGGT-TT-G--GGTCGTCTCG", "Bacteroidetes_GGCGCCTGGGT-TT-T--GGTCGTCTCG", "Bacteroidetes_GGCGTTGTGTC-CC--T-CGCCATCTCG", "Bacteroidetes_GGCGTTTTGAC-CC-CT-CGCCATCTCG", "Bacteroidetes_GGCGTTTTGAC-CG-CT-CGCCATCTCG", "Bacteroidetes_GGCGTTTTGGC-CC-CT-CGCCATCTCG", "Bacteroidetes_GGCGTTTTGTC--G-CT-CGCCATCTCG", "Bacteroidetes_GGCGTTTTGTC-CC--T-CGCCATCTCG", "Bacteroidetes_GGTGCA-GGGC-TT-TT-ATTCGTCTCG", "Bacteroidetes_GGTGCT-GGGC-TT-TT-ATTCGTCTCG", "Bacteroidetes_TATGTCAGAACATA-CAATGTAATTTAG", "Bacteroidetes_TATGTCAGAACATA-GACAGTAATTTAG", "Bacteroidetes_TATGTCAGAACATA-TAATGTAATTTAG", "Bacteroidetes_TATGTCAGAACATA-TATTGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-CAATGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-CAGTGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-GAATGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-GACAGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-GAGTGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-GATTGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-TAATGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-TAGTGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-TATTGTAATTTAG"), | |
c("Bacteroidetes_AGTGTCAGAAC-TA-GA-AGTAATTTAG", "Bacteroidetes_AGTGTCAGAAC-TA-GA-TGTAATTTAG", "Bacteroidetes_TATGTCAGAACATA-CAATGTAATTTAG", "Bacteroidetes_TATGTCAGAACATA-GACAGTAATTTAG", "Bacteroidetes_TATGTCAGAACATA-TAATGTAATTTAG", "Bacteroidetes_TATGTCAGAACATA-TATTGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-CAATGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-CAGTGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-GAATGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-GACAGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-GAGTGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-GATTGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-TAATGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-TAGTGTAATTTAG", "Bacteroidetes_TGTGTCAGAACATA-TATTGTAATTTAG"), | |
c("Spirochaetes_CACACTAACCAAACC", "Spirochaetes_CGCACCACGTCAGCC", "Spirochaetes_CTTCTGACCTAAATC", "Spirochaetes_GACACTCGATAAACC", "Spirochaetes_GACACTCGCTAAGTC", "Spirochaetes_GACACTTGATAAACC", "Spirochaetes_GACAGTACGTAAATC", "Spirochaetes_GACCCCTGCCAAATC", "Spirochaetes_GACCCTAGCTAAATC", "Spirochaetes_GATACTCGATAAACC", "Spirochaetes_GGCACTCAATAGACC", "Spirochaetes_GGCACTCGATAAACC", "Spirochaetes_GGCACTCGATAGACC", "Spirochaetes_TTTACGACTTAATTC", "Spirochaetes_TTTATGAACTAAATC", "Spirochaetes_TTTATGAACTAAATT", "Spirochaetes_TTTATGAACTGAATC", "Spirochaetes_TTTATGAACTGAATT", "Spirochaetes_TTTATGAACTTAATC", "Spirochaetes_TTTATGACTTAAATC", "Spirochaetes_TTTCTGACTTAAATC", "Spirochaetes_TTTCTGCCCTAAATC", "Spirochaetes_TTTCTGCCTTAAATC", "Spirochaetes_TTTCTGGCTTAAATC", "Spirochaetes_TTTTTGACTTAAATC", "Spirochaetes_TTTTTGCCCTAACTC"), | |
c("Firmicutes_GGCTCATTACTTGGCTTCCCTTCTACGT", "Firmicutes_GGCTCATTACTTGGTTTCACTTCTACGT", "Firmicutes_GGCTCATTACTTGGTTTCCCTTCTACGT", "Firmicutes_GGCTCATTACTTGGTTTCCCTTCTGCGT", "Firmicutes_GGCTCATTACTTGGTTTCTCTTCTACGT", "Firmicutes_GGCTCATTACTTGGTTTCTCTTCTGCGT", "Firmicutes_GGCTCATTATTTGGTTTCCCTTCTACGT", "Firmicutes_GGCTCATTGCTTGGCTTCCCTTCTACGT", "Firmicutes_GGCTCATTGCTTGGTTTCCCTTCTACGT", "Firmicutes_GGCTCATTGCTTGGTTTCTCTTCTACGT", "Firmicutes_GGCTCATTGTTTGACTCCCCTTCTACGT", "Firmicutes_GGTACGTTATTTGATTCCCCTTCTACGT", "Firmicutes_GGTTCATTACTTGGTTTCCCTTCTACGT", "Firmicutes_GGTTCATTACTTGGTTTCTCTTCTACGT", "Firmicutes_GGTTCATTATTTGATTCCCCTTCTACGT", "Firmicutes_GGTTCATTGCTTGGCTTCCCTTCTACGT", "Firmicutes_GGTTCATTGTTTGACTCCCCTTCTACGT", "Firmicutes_GGTTCATTGTTTGACTCCTCTTCTACGT", "Firmicutes_GGTTCATTGTTTGATTCCCCTTCTACGT", "Firmicutes_GGTTCGTTATTTGATTCCCCTTCTACGT", "Firmicutes_GGTTCGTTATTTGATTCCTCTTCTACGT", "Firmicutes_GGTTCGTTGCTTGGCTTCCCTTCTACGT", "Firmicutes_GGTTCGTTGCTTGGCTTCGCTACTACGT", "Firmicutes_GGTTCGTTGTTTGACTCCCCTTCTACGT", "Firmicutes_GGTTCGTTGTTTGACTTCCCTTCTACGT", "Firmicutes_GGTTCGTTGTTTGATTCCCCTTCTACGT"), | |
c("Fusobacteria_ACAGGGCCTGGC", "Fusobacteria_ATAGGACCTGGC", "Fusobacteria_ATAGGGCCTGGC", "Fusobacteria_ATTAGGTTTAAT", "Fusobacteria_CAGGGATTCAGC", "Fusobacteria_CAGGGATTCTGC", "Fusobacteria_CCCGGGTTCTGC", "Fusobacteria_CCTAGGTTCTGC", "Fusobacteria_CCTGGGCCCAGC", "Fusobacteria_TAAGGATTTATA", "Fusobacteria_TAAGGGCCTGGC", "Fusobacteria_TAGAGGTTTAGC", "Fusobacteria_TATGGATTCTGC", "Fusobacteria_TATGGATTTTAT", "Fusobacteria_TCTGGGTTCAGC", "Fusobacteria_TGAGGACTTAGC", "Fusobacteria_TGAGGACTTGGC", "Fusobacteria_TGAGGGCTTAGC", "Fusobacteria_TGCGGATTCAGC", "Fusobacteria_TGCGGATTCTGC", "Fusobacteria_TGTGGATTCTGC", "Fusobacteria_TTAGGGCTTAGC", "Fusobacteria_TTTGGGCCCAGC", "Fusobacteria_TTTGGGCCCTGC", "Fusobacteria_TTTGGGCCTTGC", "Fusobacteria_TTTGGGTTCAGC"), | |
c("Bacteroidetes_AGAAATGGGTC-GT-AA-GCTGGATTCG", "Bacteroidetes_AGGAATAGGTC-GG-AA-ATTGGATTCG", "Bacteroidetes_AGGAGA-GGTC-GC-AA-GCTGGATTCG", "Bacteroidetes_AGGAGA-GGTCGAT-AA-GCTGGATTCG", "Bacteroidetes_AGGAGA-GGTCGAT-AA-GTTGGATTCG", "Bacteroidetes_AGGAGG-GGTC-GA-AA-TTTGGATTCG", "Bacteroidetes_AGGAGG-GGTC-GT-AA-TTTGGATTCG", "Bacteroidetes_AGTAAGGGGTC-GC-GA-TCTGGATTCG", "Bacteroidetes_AGTAAGGGGTC-GT-GA-TCTGGATTCG", "Bacteroidetes_AGTAATAGGTC-GA-AA-TTTGGATTCG", "Bacteroidetes_AGTAATAGGTC-GC-AA-TTTGGATTCG", "Bacteroidetes_AGTAATAGGTC-GC-GA-TTTGGATTCA", "Bacteroidetes_AGTAATAGGTC-GT-AA-ACTGGATTCA", "Bacteroidetes_AGTAATAGGTC-GT-AA-TCTGGATTCA", "Bacteroidetes_AGTAATAGGTC-TA-AG-ATTGGATTCA", "Bacteroidetes_AGTAATAGGTC-TT-AA-ACTGGATTCA", "Bacteroidetes_AGTAATAGGTCGT--AA-ACTGGATTCG", "Bacteroidetes_AGTAATAGGTCGTT-AA-ACTGGATTCG", "Bacteroidetes_AGTAATGGGTC-GC-GA-TTTGGATTCA", "Bacteroidetes_AGTAATGGGTC-GT-AA-TCTGGATTCA", "Bacteroidetes_AGTAATGGGTC-TA-AG-ATTGGATTCA", "Bacteroidetes_AGTAATTGGTC-GC-AA-TCTGGATTCG", "Bacteroidetes_AGTAATTGGTC-GG-AA-TCTGGATTCG", "Bacteroidetes_AGTAATTGGTC-GT-AA-TCTGGATTCG"), | |
c("Gammaproteobacteria_AAAGAGTTTAACCTC", "Gammaproteobacteria_ACAGAATTTAATCTC", "Gammaproteobacteria_ATAAAGTTTGACCTC", "Gammaproteobacteria_ATAGAATATAATCTC", "Gammaproteobacteria_ATAGAATTTAATCTC", "Gammaproteobacteria_ATAGAATTTAGTCTC", "Gammaproteobacteria_ATAGAATTTGATCTC", "Gammaproteobacteria_ATAGAATTTGTTCTC", "Gammaproteobacteria_ATAGAGTATAATCTC", "Gammaproteobacteria_ATAGAGTTTAACCTC", "Gammaproteobacteria_ATAGAGTTTAATCTC", "Gammaproteobacteria_ATAGAGTTTGACCTC", "Gammaproteobacteria_ATAGAGTTTGATCTC", "Gammaproteobacteria_ATAGGGTTTAACCTC", "Gammaproteobacteria_ATAGGGTTTAACTTC", "Gammaproteobacteria_ATAGGGTTTGACTTC", "Gammaproteobacteria_ATCGAGTTAAACTTC", "Gammaproteobacteria_ATCGGG-TTAACTTC", "Gammaproteobacteria_ATCGGGTTAAACTTC", "Gammaproteobacteria_ATCGGGTTAAATTTC", "Gammaproteobacteria_ATCGGGTTAATCTTC", "Gammaproteobacteria_ATCGGGTTAGACTTC", "Gammaproteobacteria_ATCGGGTTTAATCTC"), | |
c("Fusobacteria_AATGAAATAATA", "Fusobacteria_CATGAAACATTA", "Fusobacteria_CCTGAAATATTA", "Fusobacteria_GTTGAAATATTA", "Fusobacteria_TATGAAACATTA", "Fusobacteria_TATGAAATAATA", "Fusobacteria_TATGGAACAATA", "Fusobacteria_TATGGAATAATA", "Fusobacteria_TGTAAAACATTA", "Fusobacteria_TGTGAAACAATA", "Fusobacteria_TGTGAAACATTA", "Fusobacteria_TGTGAAATAATA", "Fusobacteria_TGTGGAACAATA"), | |
c("Bacteroidetes_AATATCAGAAGATA-TACAGTAGTTATG", "Bacteroidetes_AATATCAGACGATA-TACAGTAGTTATG", "Bacteroidetes_AGGATCGGAAAATA-TACAGTAGATTTG", "Bacteroidetes_AGGATCGGAAAATA-TACTGTAGATTTG", "Bacteroidetes_AGGATCGGAAAATA-TAGTGTAGATTTG", "Bacteroidetes_AGTATCAGAAGATA-TACAGTAGTTATG", "Bacteroidetes_AGTATCAGAAGATA-TACGGTAGTTATG", "Bacteroidetes_AGTATCAGATGATA-TACAGTAGTTATG", "Bacteroidetes_GATATCAGAAGATA-TACAGTAGTTATG", "Bacteroidetes_GGTATCAGAAGATA-TACAGTAGTTATG", "Bacteroidetes_GGTATG-GAAGATA-TACCGTAATTATG", "Bacteroidetes_GGTATG-GAAGATA-TACCGTAGTTATG"), | |
c("Firmicutes_AGAACATTTGCAGCATCCTCAATTTTAA", "Firmicutes_AGAACATTTGCCGCATCCTCAATTTTAA", "Firmicutes_AGAACATTTGCGGCATCCTCAATTTTAA", "Firmicutes_GAATCATTGA-G-TCTCCGCAATTTTAA", "Firmicutes_GGAACATCAG-GCTTTCCTCAATTTTAA", "Firmicutes_GGACCATCAGGTCCTTCCCCTTTTTTAA", "Firmicutes_GGACCATCGGATCCTTCCCCATTTTTAA", "Firmicutes_GGACCATCGGGACCTTCCCCATTTTTAA", "Firmicutes_GGACCATCGGGACCTTCCCCTTTTTTAA", "Firmicutes_GGACCATCGGGTCCTTCCCCATTTTTAA", "Firmicutes_GGACCATCGGGTCCTTCCCCTTTTTTAA", "Firmicutes_GGACCATCGGGTCTTTCCCCATTTTTAA", "Firmicutes_GGACCATCTGGTCCATCCCCTTTTTTAA", "Firmicutes_GGATCATCGAAACTTTCCTCAATTTTAA", "Firmicutes_GGATCATCGAGACTTTCCTCAATTTTAA", "Firmicutes_GGATCATCGGATTTTTCCCCTTTTTTAA"), | |
c("Actinobacteria_ACGAATGCCT-CG-TTCTG", "Actinobacteria_ACGAATGCGCCTCGCTCTG", "Actinobacteria_ACGAATGCGCCTCGTTCTG", "Actinobacteria_ACGAATGCTC-CG-GTTTG", "Actinobacteria_ACGAATGTCCACT-TTTTG", "Actinobacteria_ACGAATGTTC-CG-GTTTG", "Actinobacteria_ACGAGTGTGCCTCGTTCTG", "Actinobacteria_GCGAATGCGCCTCGCTCTG", "Actinobacteria_GCGAATGCGCCTCGTTCTG", "Actinobacteria_GCGAGTGTTCTCG-CCCTG", "Actinobacteria_GCGAGTGTTCTCG-CTCTG", "Actinobacteria_GTGAACGTCT-CA-ATTTT", "Actinobacteria_GTGAGTGTGT-CG-CTCTG"), | |
c("Betaproteobacteria_GAACCATA-CTCTAACT", "Betaproteobacteria_GAACCTTA-CTCTAACT", "Betaproteobacteria_GAACCTTA-TGCTATCT", "Betaproteobacteria_GAACTTAA-TGCTAATT", "Betaproteobacteria_GAACTTTA-TTCTATCT", "Betaproteobacteria_GAACTTTA-TTCTATTT", "Betaproteobacteria_GAACTTTA-TTTCATTT", "Betaproteobacteria_GAATTATA-TTCTATTT", "Betaproteobacteria_GAATTTAA-TGATAATT", "Betaproteobacteria_GAATTTTA-CTCTATCT", "Betaproteobacteria_GAATTTTA-TGCTAATT", "Betaproteobacteria_GAATTTTA-TTCTATCT", "Betaproteobacteria_GAGCCTTA-TTCTGACT", "Betaproteobacteria_GAGCTTAA-TGCTAATT", "Betaproteobacteria_GGGCCTTA-TTCTGACT"), | |
c("Firmicutes_AGAACGTGAGATCCTACCGTTATCGCGC", "Firmicutes_AGACCGTAAGATCCTCCCGTTATCGTAC", "Firmicutes_AGACCGTAAGATCCTTCCGTTATCGTAC", "Firmicutes_AGACCGTAAGATCCTTCCGTTATCGTAT", "Firmicutes_AGATCGTAGGATCCTCCCGTTATTGTAC", "Firmicutes_GGAACGTGAGATCCTACCGTTATCGCGC", "Firmicutes_GGAACGTGAGATCCTACCGTTATCGTAC", "Firmicutes_GGACCGTAAGATCCTCCCGTTATTGTAC", "Firmicutes_GGATCGTAAGATCCTCCCGTTATTGTAC"), | |
c("Actinobacteria_ATGAGAACAC-CA-CTTCG", "Actinobacteria_ATGAGAATCCGT--TTTCG", "Actinobacteria_ATGAGAATCCGT-TTTTCG", "Actinobacteria_ATGAGAATCT-TT-TTTCG", "Actinobacteria_ATGAGAATCTGT--TTTCG", "Actinobacteria_ATGAGTATGT-TT-AGTCG"), | |
c("Bacteroidetes_AATATCAGGTT-AT-AT-AATCGATAGG", "Bacteroidetes_AATATCAGGTT-GT-AA-AATCGATAGG", "Bacteroidetes_AATATCAGGTT-GT-AT-AATCGATAGG", "Bacteroidetes_AATATCAGGTT-TT-AG-AATCGATAGG", "Bacteroidetes_AATATCAGGTT-TT-AT-AATCGATAGG", "Bacteroidetes_AATATTTGGTT-TT-AG-TATCGATAGG"), | |
c("Bacteroidetes_GATGACAGACG-AA-TA-TGTCATTTCT", "Firmicutes_AAAACATAAGATCCTCCCGTTATCATAT", "Firmicutes_AAAACGTAAGATC-TTCCGTTACCACGT", "Firmicutes_AAAACGTAAGATCCTTCCGTTACCACGT", "Firmicutes_AAAACGTAAGATCCTTCCGTTACCGCGT", "Firmicutes_AGAACGTAAGATCCTTCCGTTACCGTGC", "Firmicutes_AGAACGTGAGATCCTACCGTTATCGCGC", "Firmicutes_AGACCGTAAGATCCTCCCGTTATCGTAC", "Firmicutes_AGACCGTAAGATCCTTCCGTTATCGTAC", "Firmicutes_AGACCGTAAGATCCTTCCGTTATCGTAT", "Firmicutes_AGATCGTAGGATCCTCCCGTTATTGTAC", "Firmicutes_GAATCGTAAGATCCTTCCTTTGCCGCGT", "Firmicutes_GGAACGTAAGATCCTTCCGTTACCACGT", "Firmicutes_GGAACGTAAGATCCTTCCGTTACCGCGT", "Firmicutes_GGAACGTGAGATCCTACCGTTATCGCGC", "Firmicutes_GGAACGTGAGATCCTACCGTTATCGTAC", "Firmicutes_GGACCATAAGATCCTCCCGTTACCGCGC", "Firmicutes_GGACCGTAAGATCCTCCCGTTATTGTAC", "Firmicutes_GGATCGTAAGATCCTCCCGTTATTGTAC"), | |
c("Firmicutes_AGAGCATCTCCGGGATCCACAATTTTAA", "Firmicutes_AGATCATCGGCACTCTCCTCAATTTTAA", "Firmicutes_AGATCATCGGCACTCTCCTCTATTTTAA", "Firmicutes_AGATCATCGGCACTTTCCTCAATTTTAA", "Firmicutes_AGATCATCGGCATTCTCCTCTATTTTAA"), | |
c("Epsilonproteobacteria_AAT", "Epsilonproteobacteria_ATT", "Epsilonproteobacteria_GAT", "Epsilonproteobacteria_GGC", "Epsilonproteobacteria_GTC", "Epsilonproteobacteria_TCA"), | |
c("Gammaproteobacteria_ATAGGGTTTGACTTC", "Gammaproteobacteria_ATCGAGTTTAACTTC", "Gammaproteobacteria_ATCGGG-TTAACTTC", "Gammaproteobacteria_ATCGGGCTTGACTAC", "Gammaproteobacteria_ATCGGGTTAGACTTC", "Gammaproteobacteria_ATCGGGTTTAACTAC", "Gammaproteobacteria_ATCGGGTTTGACTTC", "Gammaproteobacteria_ATCGTGTTTGACTTC", "Gammaproteobacteria_ATGGTGTTTGACTTC"), | |
c("Actinobacteria_ACGAAAACACACA-TTCTG", "Actinobacteria_ATAAGAACACACA-TTCTG", "Actinobacteria_ATGAGAACACACA-TTCTG"), | |
c("Actinobacteria_ATGAGTGCACACA-TCCTA", "Actinobacteria_ATGAGTGTACACA-TCCTA", "Actinobacteria_ATGAGTGTCCACA-GTCTA"), | |
c("Firmicutes_AGAACATTTGCAGCATCCTCAATTTTAA", "Firmicutes_AGAACATTTGCCGCATCCTCAATTTTAA", "Firmicutes_AGAACATTTGCGGCATCCTCAATTTTAA", "Firmicutes_GGATCATCGGATTTTTCCCCTTTTTTAA"), | |
c("Firmicutes_GGAACGTAAGATCCTTCCGTTACCACGT", "Firmicutes_GGAACGTAAGATCCTTCCGTTACCGCGT", "Firmicutes_GGACCATAAGATCCTCCCGTTACCGCGC"), | |
c("Betaproteobacteria_GAACTTTA-TTCTATCT", "Betaproteobacteria_GAATTATA-TTCTATTT", "Betaproteobacteria_GAATTTAA-TGATAATT", "Betaproteobacteria_GAATTTTA-TGCTAATT", "Betaproteobacteria_GAATTTTA-TTCTATCT"), | |
c("Actinobacteria_GTGAA-TCAA-A--CTCTG", "Actinobacteria_GTGAA-TCAG-A--CTCTG", "Actinobacteria_GTGAA-TTA--AG-CTCTG"), | |
c("Gammaproteobacteria_ACAGAATTTAATCTC", "Gammaproteobacteria_ATAGGGTTTAACTTC", "Gammaproteobacteria_ATAGGGTTTGACTTC", "Gammaproteobacteria_ATCGAGTTTAACTTC", "Gammaproteobacteria_ATCGGG-TTAACTTC", "Gammaproteobacteria_ATCGGGTTAAACTTC", "Gammaproteobacteria_ATCGGGTTAAATTTC", "Gammaproteobacteria_ATCGGGTTTAACTAC", "Gammaproteobacteria_ATCGGGTTTGACTTC"), | |
c("Bacteroidetes_AGTACTAGAAG-TA-TA-TGTAGTTATG", "Bacteroidetes_AGTATTAGAAG-TA-TA-TGTAGTTATG"), | |
c("Firmicutes_GACTCATCTTTTAAATCCGTAACTGTAC", "Firmicutes_GACTCATTTTTTAAATCCCTAACTGTAC"), | |
c("Bacteroidetes_AGTAACAGAAC-TA-TA-TGTTGATATA", "Bacteroidetes_AGTATCAGAACATA-TACTGTTGATATA"), | |
c("Gammaproteobacteria_TAAGGGCATGACGCA", "Gammaproteobacteria_TCAGGTTTTAAAGCA"), | |
c("Firmicutes_AAATCATTCAGAATGTCCGCTATCACGT", "Firmicutes_AGAACATTGAAATTTTCCACAATTTTAA", "Firmicutes_AGAACATTGAATCTTTCCCCATTTTTAA", "Firmicutes_AGAACATTGAATCTTTCCTCATTTTTAA", "Firmicutes_AGAACATTTGCAGCATCCTCAATTTTAA", "Firmicutes_AGAACATTTGCCGCATCCTCAATTTTAA", "Firmicutes_AGAACATTTGCGGCATCCTCAATTTTAA", "Firmicutes_AGAGCATCTCCGGGATCCACAATTTTAA", "Firmicutes_AGATCATCGGCACTCTCCTCAATTTTAA", "Firmicutes_AGATCATCGGCACTCTCCTCTATTTTAA", "Firmicutes_AGATCATCGGCACTTTCCTCAATTTTAA", "Firmicutes_AGATCATCGGCATTCTCCTCTATTTTAA", "Firmicutes_GAATCATCGA-G-TTTCCGCAATTTTAA", "Firmicutes_GAATCATTGA-G-TCTCCGCAATTTTAA", "Firmicutes_GGAACATCAG-GCTTTCCTCAATTTTAA", "Firmicutes_GGACCATCAGGTCCTTCCCCTTTTTTAA", "Firmicutes_GGACCATCGGATCCTTCCCCATTTTTAA", "Firmicutes_GGACCATCGGGACCTTCCCCATTTTTAA", "Firmicutes_GGACCATCGGGACCTTCCCCTTTTTTAA", "Firmicutes_GGACCATCGGGTCCTTCCCCATTTTTAA", "Firmicutes_GGACCATCGGGTCCTTCCCCTTTTTTAA", "Firmicutes_GGACCATCGGGTCTTTCCCCATTTTTAA", "Firmicutes_GGACCATCTGGTCCATCCCCTTTTTTAA", "Firmicutes_GGATCATCGAAACTTTCCTCAATTTTAA", "Firmicutes_GGATCATCGAGACTTTCCTCAATTTTAA", "Firmicutes_GGATCATCGGATTTTTCCCCTTTTTTAA"), | |
c("Firmicutes_GGCTCATTTC-AGGATCTCCTTCTACGT", "Firmicutes_GGCTCATTTC-AGGATCTTCTTCTACGT"), | |
c("Firmicutes_GAATCATCGA-G-TTTCCGCAATTTTAA", "Firmicutes_GAATCATCGG-GGCTTCCCTTTCTCATG", "Firmicutes_GAATCATTGA-G-TCTCCGCAATTTTAA", "Firmicutes_GGACCATCGGATCCTTCCCCATTTTTAA", "Firmicutes_GGACTATCGG-GGCTTCCTTTTCTCATG"), | |
c("Betaproteobacteria_AAACATAT--TCTGCAT", "Betaproteobacteria_GAACATAT--TCTGCAT"), | |
c("Betaproteobacteria_GTGCATC--TTTCAATT", "Betaproteobacteria_GTGCATC-TTTTCAATT"), | |
c("Firmicutes_AGAACATTGAATCTTTCCCCATTTTTAA", "Firmicutes_AGAACATTGAATCTTTCCTCATTTTTAA"), | |
c("Actinobacteria_ATGAGTGTTCTCA-ATTTT", "Actinobacteria_GTGAACGTCT-CA-ATTTT")) | |
for(i in seq(1, length(genera))){ | |
print(genera[i]) | |
genera_oligos_vector <- unlist(genera_oligos[i]) | |
sub_df <- df_v3v5[df_v3v5$oligo %in% genera_oligos_vector, ] | |
x = c() | |
counter <- 1 | |
for(site in oral_sites){ | |
x[counter] <- sum(sub_df[sub_df$site == site, ]$abundance) | |
counter = counter + 1 | |
} | |
pdf(paste("~/ORAL_", sprintf("%04d", i), "_", genera[i], ".pdf", sep = ""), width=12, height=6) | |
STANDARD_FIG(df=df_v3v5, | |
oligos= genera_oligos_vector, | |
labels = genera_oligos_vector, | |
sites = oral_sites, | |
title = genera[i], | |
colors = rainbow(length(genera_oligos_vector)), | |
cluster_site = oral_sites[x == max(x)], | |
metric = 'horn', | |
method = 'ward') | |
OLIGO_DIST(df_v3v5_seqs, genera_oligos_vector, genera_oligos_vector) | |
dev.off() | |
} | |
######################################################################################################################## | |
######################################################################################################################## | |
v1_v3_num_nucleotides_chosen_per_oligo <- c(21, 22, 17, 14, 33, 18, 13, 17, 15) | |
v3_v5_num_nucleotides_chosen_per_oligo <- c(19, 28, 17, 3, 28, 12, 15, 15) | |
mean(v1_v3_num_nucleotides_chosen_per_oligo) | |
mean(v3_v5_num_nucleotides_chosen_per_oligo) | |
######################################################################################################################## | |
######################################################################################################################## | |
all_oligos <- levels(df_v3v5$oligo) | |
pdf('~/ORAL_ALL.pdf', 32, height=16) | |
for(s in oral_sites){ | |
PLOT_OLIGO_EVERYWHERE(df_v3v5, | |
all_oligos, | |
cluster_site = s, | |
sites = oral_sites, | |
method = 'ward', | |
title = s, | |
labels = all_oligos, | |
colors = rainbow(length(all_oligos)), | |
metric = 'euclidean') | |
} | |
dev.off() | |
######################################################################################################################## | |
######################################################################################################################## |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment