Skip to content

Instantly share code, notes, and snippets.

@phydev
Created April 24, 2023 21:41
Show Gist options
  • Save phydev/151a7ec36fbac3158655d6f1b81eb6d8 to your computer and use it in GitHub Desktop.
Save phydev/151a7ec36fbac3158655d6f1b81eb6d8 to your computer and use it in GitHub Desktop.
simulation of phenotypic models based on gene expression data
# simulation of models based on gene expression data
# we create two different datasets drawn from normal distributions
# with different mean and standard deviation
# we then normalize the data using quantile normalization
# and fit a logistic regression model with glmnet
# load libraries
library(dplyr)
library(glmnet)
library(preprocessCore)
# set seed
set.seed(123)
# define number of genes and samples
n_genes <- 5
n_samples <- 100
# generate development data
df_train <- data.frame(matrix(rnorm(n_genes*n_samples), nrow=n_samples, ncol=n_genes))
# transpose data
df_train <- t(df_train)
# generate validation data
df_val <- data.frame(matrix(rnorm(n_genes*n_samples, mean=100, sd=10), nrow=n_samples, ncol=n_genes))
# transpose data
df_val <- t(df_val)
# quantile normalization with preprocessCore
df_train_norm <- as.data.frame(normalize.quantiles(as.matrix(df_train)))
rownames(df_train_norm) <- rownames(df_train)
df_val_norm <- as.data.frame(normalize.quantiles(as.matrix(df_val)))
rownames(df_val_norm) <- rownames(df_val)
# tidy data
df_train_norm <- df_train_norm %>% t %>% as.data.frame
df_val_norm <- df_val_norm %>% t %>% as.data.frame
#hist(df_train_norm[,1])
# model definition
df_train_norm <- df_train_norm %>%
mutate(LP = 0.01 * X1 + 0.034 * X2 - 0.056 * X3 - 0.012 * X4 + 0.023 * X5 +
rnorm(nrow(df_train_norm), mean = 0, sd = 0.01))
df_val_norm <- df_val_norm %>%
mutate(LP = 0.01 * X1 + 0.034 * X2 - 0.056 * X3 - 0.012 * X4 + 0.023 * X5 +
rnorm(nrow(df_val_norm), mean = 0, sd = 0.01))
df_train_norm <- df_train_norm %>% mutate(y = ifelse(LP > median(LP), 1, 0))
df_val_norm <- df_val_norm %>% mutate(y = ifelse(LP > median(LP), 1, 0))
df_train_norm_scaled <- df_train_norm %>% mutate(across(-c(y, LP), scale))
df_val_norm_scaled <- df_val_norm %>% mutate(across(-c(y, LP), scale))
model <- glmnet(x = df_train_norm[, 1:5],
y = df_train_norm$y,
family = "binomial",
alpha = 0.5,
lambda = 0.1,
standardize = FALSE)
predict(model,
newx = as.matrix(df_train_norm[, 1:5]),
type = "class")
predict(model,
newx = as.matrix(df_val_norm[, 1:5]),
type = "class")
# load library to compute AUC
library(pROC)
auc(as.vector(df_val_norm$y), as.vector(predict(model,
newx = as.matrix(df_val_norm[, 1:5]),
type = "response")))
auc(as.vector(df_val_norm_scaled$y), as.vector(predict(model,
newx = as.matrix(df_val_norm_scaled[, 1:5]),
type = "response")))
df_val_norm
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment