Skip to content

Instantly share code, notes, and snippets.

@felipealbrecht
Last active June 17, 2018 09:40
Show Gist options
  • Save felipealbrecht/4c864984786aa62c4a0063d7ec424a1a to your computer and use it in GitHub Desktop.
Save felipealbrecht/4c864984786aa62c4a0063d7ec424a1a to your computer and use it in GitHub Desktop.
library(DeepBlueR)
library(gplots)
library(RColorBrewer)
library(matrixStats)
library(stringr)
#crest_data <- deepblue_list_experiments(project = "CREST", genome="grch38", epigenetic_mark = "DNA Methylation")
#crest_data <- crest_data[grep("_call", deepblue_extract_names(crest_data)),]
grid_experiments = c( "JKU004_call.wig",
"JKU001_call.wig",
"JKU002_call.wig",
"JKU003_call.wig",
"JKU015_call.wig",
"JKU008_call.wig",
"JKU007_call.wig",
"JKU006_call.wig",
"JKU005_call.wig",
"JKU011_call.wig",
"JKU009_call.wig",
"JKU013_call.wig",
"JKU014_call.wig" )
crest_data <- deepblue_name_to_id(grid_experiments, "experiments")
exp_columns <- deepblue_select_column(crest_data, "VALUE")
# http://www.ensembl.org/info/genome/funcgen/regulatory_build.html
blueprint_regulatory_regions <- deepblue_select_annotations(
annotation_name = "Blueprint Ensembl Regulatory Build",
genome = "GRCh38")
request_id <- deepblue_score_matrix(
experiments_columns = exp_columns,
aggregation_function = "mean",
aggregation_regions_id = blueprint_regulatory_regions)
score_matrix <- deepblue_download_request_data(request_id = request_id)
getPalette <- colorRampPalette(brewer.pal(9, "Set1"))
experiments_info <- deepblue_info(deepblue_extract_ids(crest_data))
biosource <- unlist(lapply(experiments_info, function(x){ x$sample_info$biosource_name}))
color_map <- data.frame(biosource = unique(biosource),
color = getPalette(length(unique(biosource))))
exp_names <- unlist(lapply(experiments_info, function(x){ x$name}))
biosource_colors <- data.frame(name = exp_names, biosource = biosource)
biosource_colors <- dplyr::left_join(biosource_colors, color_map, by = "biosource")
color_vector <- as.character(biosource_colors$color)
names(color_vector) <- biosource_colors$biosource
filtered_score_matrix <- as.matrix(score_matrix[,-c(1:3), with=FALSE])
head(filtered_score_matrix[,1:3])
message("regions before: ", nrow(filtered_score_matrix))
filtered_score_matrix <- filtered_score_matrix[which(complete.cases(filtered_score_matrix)),]
message("regions after: ", nrow(filtered_score_matrix))
message("regions before: ", nrow(filtered_score_matrix))
filtered_score_matrix_rowVars <- rowVars(filtered_score_matrix, na.rm = TRUE)
filtered_score_matrix <- filtered_score_matrix[which(filtered_score_matrix_rowVars > 0.10),]
message("regions after: ", nrow(filtered_score_matrix))
filtered_score_matrix <- filtered_score_matrix[,exp_names]
par(oma=c(10,4,4,2))
par(mar=c(10,4,4,2))
heatmap.2(filtered_score_matrix,
trace = "none", ColSideColors = color_vector,
hclust=function(x) hclust(x,method="complete"),
distfun=function(x) as.dist(1-cor(t(x), method = "pearson")),
Rowv = TRUE, dendrogram = "column",
key.xlab = "beta value", denscol = "black", keysize = 1.5)
legend("left",
legend = color_map$biosource,
col = as.character(color_map$color),
text.width = 0.25,
lty= 1,
lwd = 6,
cex = 0.7,
y.intersp = 0.7,
x.intersp = 0.7,
inset=c(0.0,0.0))
@mlist
Copy link

mlist commented May 18, 2017

#here you remove the genomic coordinates. BIG mistake :-D
filtered_score_matrix <- as.matrix(score_matrix[,-c(1:3), with=FALSE])
head(filtered_score_matrix[,1:3])

genomic_regions <- score_matrix[,1:3]
rownames(filtered_score_matrix) <- apply(score_matrix[,1:3], 1, paste)

message("regions before: ", nrow(filtered_score_matrix))
whichComplete <- which(complete.cases(filtered_score_matrix))
filtered_score_matrix <- filtered_score_matrix[whichComplete,]
genomic_regions <-  genomic_regions[whichComplete,]

message("regions after: ", nrow(filtered_score_matrix))


message("regions before: ", nrow(filtered_score_matrix))
filtered_score_matrix_rowVars <- rowVars(filtered_score_matrix, na.rm = TRUE)
whichVariable <- which(filtered_score_matrix_rowVars > 0.20)

filtered_score_matrix <- filtered_score_matrix[whichVariable,]
genomic_regions <- genomic_regions[whichVariable,]

#add row names
rownames(filtered_score_matrix) <- apply(genomic_regions, 1, function(x) paste(stringr::str_trim(x), collapse = ":"))
message("regions after: ", nrow(filtered_score_matrix))

filtered_score_matrix <- filtered_score_matrix[,exp_names]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment