Skip to content

Instantly share code, notes, and snippets.

@jokergoo
Created May 20, 2020 19:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jokergoo/0ea5639ee25a7edae3871ed8252924a1 to your computer and use it in GitHub Desktop.
Save jokergoo/0ea5639ee25a7edae3871ed8252924a1 to your computer and use it in GitHub Desktop.
type = c(rep("Tumor", 10), rep("Control", 10))
set.seed(888)
######################################
# generate methylation matrix
rand_meth = function(k, mean) {
(runif(k) - 0.5)*min(c(1-mean), mean) + mean
}
mean_meth = c(rand_meth(300, 0.3), rand_meth(700, 0.7))
mat_meth = as.data.frame(lapply(mean_meth, function(m) {
if(m < 0.3) {
c(rand_meth(10, m), rand_meth(10, m + 0.2))
} else if(m > 0.7) {
c(rand_meth(10, m), rand_meth(10, m - 0.2))
} else {
c(rand_meth(10, m), rand_meth(10, m + sample(c(1, -1), 1)*0.2))
}
}))
mat_meth = t(mat_meth)
rownames(mat_meth) = NULL
colnames(mat_meth) = paste0("sample", 1:20)
######################################
# generate directions for methylation
direction = rowMeans(mat_meth[, 1:10]) - rowMeans(mat_meth[, 11:20])
direction = ifelse(direction > 0, "hyper", "hypo")
#######################################
# generate expression matrix
mat_expr = t(apply(mat_meth, 1, function(x) {
x = x + rnorm(length(x), sd = (runif(1)-0.5)*0.4 + 0.5)
-scale(x)
}))
dimnames(mat_expr) = dimnames(mat_meth)
#############################################################
# matrix for correlation between methylation and expression
cor_pvalue = -log10(sapply(seq_len(nrow(mat_meth)), function(i) {
cor.test(mat_meth[i, ], mat_expr[i, ])$p.value
}))
#####################################################
# matrix for types of genes
gene_type = sample(c("protein_coding", "lincRNA", "microRNA", "psedo-gene", "others"),
nrow(mat_meth), replace = TRUE, prob = c(6, 1, 1, 1, 1))
#################################################
# annotation to genes
anno_gene = sapply(mean_meth, function(m) {
if(m > 0.6) {
if(runif(1) < 0.8) return("intragenic")
}
if(m < 0.4) {
if(runif(1) < 0.4) return("TSS")
}
return("intergenic")
})
############################################
# distance to genes
dist = sapply(mean_meth, function(m) {
if(m < 0.6) {
if(runif(1) < 0.8) return(round( (runif(1)-0.5)*1000000 + 500000 ))
}
if(m < 0.3) {
if(runif(1) < 0.4) return(round( (runif(1) - 0.5)*1000 + 500))
}
return(round( (runif(1) - 0.5)*100000 + 50000))
})
#######################################
# annotation to enhancers
rand_enhancer = function(m) {
if(m < 0.4) {
if(runif(1) < 0.6) return(runif(1))
} else if (runif(1) < 0.1) {
return(runif(1))
}
return(0)
}
anno_enhancer_1 = sapply(mean_meth, rand_enhancer)
anno_enhancer_2 = sapply(mean_meth, rand_enhancer)
anno_enhancer_3 = sapply(mean_meth, rand_enhancer)
anno_enhancer = data.frame(enhancer_1 = anno_enhancer_1, enhancer_2 = anno_enhancer_2, enhancer_3 = anno_enhancer_3)
#################################
# put everything into one object
res_list = list()
res_list$type = type
res_list$mat_meth = mat_meth
res_list$mat_expr = mat_expr
res_list$direction = direction
res_list$cor_pvalue = cor_pvalue
res_list$gene_type = gene_type
res_list$anno_gene = anno_gene
res_list$dist = dist
res_list$anno_enhancer = anno_enhancer
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment