Skip to content

Instantly share code, notes, and snippets.

@MatteoLacki
Created December 7, 2023 12:59
Show Gist options
  • Save MatteoLacki/21a831e00a8db206411e28efd30db0da to your computer and use it in GitHub Desktop.
Save MatteoLacki/21a831e00a8db206411e28efd30db0da to your computer and use it in GitHub Desktop.
test_downcentric_whatever.R
library("impute")
# simulating a matrix
peptideCnt = 1000
runsCnt = 6
expressions = matrix(
data=rnorm(peptideCnt*runsCnt, mean=1000, sd=400),
nrow=peptideCnt,
ncol=runsCnt
)
draw_missing_at_random_mask = function(expressions, prob = c(0.75, 0.25)) sample(
x = c(FALSE, TRUE),
size = nrow(expressions) * ncol(expressions),
prob = prob,
replace = TRUE
)
missing_at_random_mask = draw_missing_at_random_mask(expressions)
table(missing_at_random_mask)
expressions_with_mocked_NAs = expressions
expressions_with_mocked_NAs[missing_at_random_mask] = NA
expressions_with_mocked_NAs_imputed = impute.knn(expressions_with_mocked_NAs, k=10)$data
nrow(expressions_with_mocked_NAs_imputed)
ncol(expressions_with_mocked_NAs_imputed)
# values not needing imputation untouched? YES
all(expressions_with_mocked_NAs_imputed[!missing_at_random_mask] == expressions[!missing_at_random_mask])
# errors:
err = expressions_with_mocked_NAs_imputed[missing_at_random_mask] - expressions[missing_at_random_mask]
hist(err, n=100)
# then: comparison of 2 sample conditions.
cond_A = matrix(
data=rnorm(peptideCnt*runsCnt, mean=1000, sd=100),
nrow=peptideCnt,
ncol=runsCnt
)
cond_B = matrix(
data=rnorm(peptideCnt*runsCnt, mean=1200, sd=100),
nrow=peptideCnt,
ncol=runsCnt
)
# do t.tests
do_tests = function(cond_A, cond_B){
if( nrow(cond_A) == nrow(cond_B) ){
pvalues = numeric(nrow(cond_A))
for( i in 1:nrow(cond_A)){
test_result = t.test( cond_A[i,], cond_B[i,] )
pvalues[i] = test_result$p.value
}
}
pvalues
}
p_values = do_tests(cond_A, cond_B)
adj_p_values = p.adjust(p_values, method="fdr")
plot(p_values[p_values< 0.1], adj_p_values[p_values< 0.1], pch=".")
abline(coef = c(0,1))
abline(h=0.01)
abline(v=0.01)
all_discoveries_mask = adj_p_values < 0.01
cond_A_mocked_NAs = cond_A
cond_A_mock_mask = draw_missing_at_random_mask(cond_A)
cond_A_mocked_NAs[cond_A_mock_mask] = NA
cond_A_mocked_NAs_imputed = impute.knn(cond_A_mocked_NAs, k=10)$data
cond_B_mocked_NAs = cond_B
cond_B_mock_mask = draw_missing_at_random_mask(cond_B)
cond_B_mocked_NAs[cond_B_mock_mask] = NA
cond_B_mocked_NAs_imputed = impute.knn(cond_B_mocked_NAs, k=10)$data
imputed_p_values = do_tests(cond_A_mocked_NAs_imputed, cond_B_mocked_NAs_imputed)
adj_imputed_p_values = p.adjust(imputed_p_values, method="fdr")
all_imputed_discoveries_mask = adj_imputed_p_values < 0.01
table(all_discoveries_mask, all_imputed_discoveries_mask)
# should the values be imputed within groups? Which groups? Likely technical replicates.
# what is t.test doing with missing values as such?
?t.test
getOption("na.action") # so simply omits them
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment