Last active
November 20, 2022 16:12
-
-
Save hakyim/fa55bc3b656f5273830afc4c7fe101a9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## loglik test | |
logLik.test = function(fit0,fit) | |
{ | |
l0 = logLik(fit0) | |
l1 = logLik(fit) | |
lratio = abs(as.numeric(l0)-as.numeric(l1)) | |
df = abs( attr(l1,'df') - attr(l0,'df') ) | |
pval = 1-pchisq( 2*lratio, df) | |
return(pval) | |
} | |
## inverse normalization | |
invnorm = function(x) { | |
if(is.null(dim(x))) res = invnorm.vector(x) else | |
res=apply(x,2,invnorm.vector) | |
res | |
} | |
invnorm.vector = function(x) {yy = rank(x)/(length(x)+1); qnorm(yy)} | |
## impute with column means | |
impute.col.means = function(df) | |
{ | |
meany = apply(df,2,mean,na.rm=TRUE) | |
for(col in 1:ncol(df)) df[,col][is.na(df[,col])] = meany[col] | |
df | |
} | |
## drop repeated vars but keeping first row for given var | |
## may be better to use top_n() | |
## example: widetempo %>% group_by(gene_id) %>% top_n(1,rank1) | |
## but when there are ties, top_n() will take all rows rather than 1 | |
droprep = function(df,repvar,sortvar,decreasing=FALSE) | |
{ | |
if(repvar %in% names(df)) | |
{ | |
if(sortvar %in% names(df)) | |
{ | |
tab = table(df[[repvar]]) | |
nametab = names(tab) | |
uniquelist = nametab[tab==1] | |
replist = nametab[tab>1] | |
ind = df[[repvar]] %in% uniquelist | |
res = df[ind,] | |
tempo = df[!ind,] | |
tempo = tempo[order(tempo[[sortvar]],decreasing=decreasing),] | |
for(vv in replist) | |
{ | |
rowa = tempo[tempo[[repvar]]==vv,][1,] | |
res = rbind(res,rowa) | |
} | |
return(res) | |
} else {warning(sortvar %&% ' (sortvar) not in data frame')} | |
} else {warning(repvar %&% ' (repvar) not in data frame')} | |
} | |
## HLA genes | |
## HLA Diversity in the 1000 Genomes Dataset | |
## http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0097282 | |
## Table S1: MHC region definitions | |
## MHC Sub-region Number of variants % of variants SNPs Gene Position | |
## From to From to From To | |
## Ext_CLASS_I 11,180 11% rs3130838 rs1122947 TRIM27 MOG 28,866,528 29,638,434 | |
## CLASS_I 42,718 42% rs375984 rs3131630 ZFP57 MICB 29,644,502 31,485,354 | |
## CLASS_III 8,033 8% rs4959079 rs416352 MCCD1 NOTCH4 31,488,879 32,207,393 | |
## CLASS_II 29,906 29% rs424232 rs3129223 AK123889 HLA-DPB2 32,208,324 33,113,197 | |
## Ext_CLASS_II 9,403 10% rs1003979 rs1547668 COL11A2 MLN 33,114,171 33,775,446 | |
## Total 103,310 100% DB snp UCSC UCSC HG19/Build37 | |
# hla.genelist = geneannot %>% filter(chr=='6') %>% filter(start > 28866528 & end < 33775446) %>% .[["ensgene"]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment