Created
June 9, 2015 21:09
-
-
Save lzamparo/229949debeecaecbe673 to your computer and use it in GitHub Desktop.
fisher.test on subset & summarized data frames using dplyr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# here's what the data looks like: | |
> all_chrs_dt | |
lower upper signif_label signif_unlabel not_signif_label not_signif_unlabel test_name | |
1: 0 100000 1293 1121 1235366 224096 unnannotated to unnannotated | |
2: 100000 200000 465 1335 1244847 227812 unnannotated to unnannotated | |
3: 200000 300000 1296 2358 1244090 225916 unnannotated to unnannotated | |
4: 300000 400000 1245 1840 1242731 226852 unnannotated to unnannotated | |
5: 400000 500000 627 995 1241226 227550 unnannotated to unnannotated | |
--- | |
5056: 1500000 1600000 0 76 0 895216 DNase to DNase | |
5057: 1600000 1700000 0 107 2 893980 DNase to DNase | |
5058: 1700000 1800000 0 162 1 891953 DNase to DNase | |
5059: 1800000 1900000 0 98 1 890585 DNase to DNase | |
5060: 1900000 2000000 0 78 1 889103 DNase to DNase | |
chromosome | |
1: chr1 | |
2: chr1 | |
3: chr1 | |
4: chr1 | |
5: chr1 | |
--- | |
5056: chrX | |
5057: chrX | |
5058: chrX | |
5059: chrX | |
5060: chrX | |
# I'm trying to do fisher.test on bands of lower & upper e.g lower = 0 & upper = 100000 | |
# For a given value of upper, I can get the row containing what I need in a df like so: | |
> counts <- all_chrs_dt %>% group_by(test_name) %>% filter(upper == 100000) %>% summarise(all_significant_label = sum(signif_label), all_significant_unlabel = sum(signif_unlabel), all_not_significant_label = sum(not_signif_label), all_not_significant_unlabel = sum(not_signif_unlabel)) | |
> counts | |
Source: local data table [11 x 5] | |
test_name all_significant_label all_significant_unlabel all_not_significant_label | |
1 unnannotated to unnannotated 15506 10712 15203321 | |
2 unnannotated to CTCF 2879 23339 1245936 | |
3 unnannotated to DNase-K27ac 5448 20770 804284 | |
4 unnannotated to DNase 267 25951 113358 | |
5 CTCF to DNase-K27ac 711 25507 57430 | |
6 CTCF to CTCF 359 25859 41148 | |
7 DNase-K27ac to DNase-K27ac 967 25251 29548 | |
8 CTCF to DNase 32 26186 7094 | |
9 DNase to DNase-K27ac 40 26178 4456 | |
10 DNase-K27ac to DNase 2 26216 811 | |
11 DNase to DNase 7 26211 618 | |
Variables not shown: all_not_significant_unlabel (int) | |
# I'd like to apply fisher.test to each row of this df, but I don't seem to be able to form the contingency tables | |
# properly | |
> counts <- all_chrs_dt %>% group_by(test_name) %>% filter(upper == 100000) %>% summarise(all_significant_label = sum(signif_label), all_significant_unlabel = sum(signif_unlabel), all_not_significant_label = sum(not_signif_label), all_not_significant_unlabel = sum(not_signif_unlabel)) %>% group_by(test_name) %>% fisher.test(matrix(c(all_significant_label,all_significant_unlabel,all_not_significant_label, all_not_significant_unlabel),nrow=2,ncol=2))[[p.value]] | |
Error in `[[.data.frame`(., fisher.test(matrix(c(all_significant_label, : | |
object 'p.value' not found | |
>counts <- all_chrs_dt %>% group_by(test_name) %>% filter(upper == 100000) %>% summarise(all_significant_label = sum(signif_label), all_significant_unlabel = sum(signif_unlabel), all_not_significant_label = sum(not_signif_label), all_not_significant_unlabel = sum(not_signif_unlabel)) %>% group_by(test_name) %>% fisher.test(matrix(c(all_significant_label,all_significant_unlabel,all_not_significant_label, all_not_significant_unlabel),nrow=2,ncol=2)) | |
Error in fisher.test(., matrix(c(all_significant_label, all_significant_unlabel, : | |
all entries of 'x' must be nonnegative and finite | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment