Skip to content

Instantly share code, notes, and snippets.

@hakyim
Last active November 20, 2022 16:12
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hakyim/fa55bc3b656f5273830afc4c7fe101a9 to your computer and use it in GitHub Desktop.
Save hakyim/fa55bc3b656f5273830afc4c7fe101a9 to your computer and use it in GitHub Desktop.
## loglik test
logLik.test = function(fit0,fit)
{
l0 = logLik(fit0)
l1 = logLik(fit)
lratio = abs(as.numeric(l0)-as.numeric(l1))
df = abs( attr(l1,'df') - attr(l0,'df') )
pval = 1-pchisq( 2*lratio, df)
return(pval)
}
## inverse normalization
invnorm = function(x) {
if(is.null(dim(x))) res = invnorm.vector(x) else
res=apply(x,2,invnorm.vector)
res
}
invnorm.vector = function(x) {yy = rank(x)/(length(x)+1); qnorm(yy)}
## impute with column means
impute.col.means = function(df)
{
meany = apply(df,2,mean,na.rm=TRUE)
for(col in 1:ncol(df)) df[,col][is.na(df[,col])] = meany[col]
df
}
## drop repeated vars but keeping first row for given var
## may be better to use top_n()
## example: widetempo %>% group_by(gene_id) %>% top_n(1,rank1)
## but when there are ties, top_n() will take all rows rather than 1
droprep = function(df,repvar,sortvar,decreasing=FALSE)
{
if(repvar %in% names(df))
{
if(sortvar %in% names(df))
{
tab = table(df[[repvar]])
nametab = names(tab)
uniquelist = nametab[tab==1]
replist = nametab[tab>1]
ind = df[[repvar]] %in% uniquelist
res = df[ind,]
tempo = df[!ind,]
tempo = tempo[order(tempo[[sortvar]],decreasing=decreasing),]
for(vv in replist)
{
rowa = tempo[tempo[[repvar]]==vv,][1,]
res = rbind(res,rowa)
}
return(res)
} else {warning(sortvar %&% ' (sortvar) not in data frame')}
} else {warning(repvar %&% ' (repvar) not in data frame')}
}
## HLA genes
## HLA Diversity in the 1000 Genomes Dataset
## http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0097282
## Table S1: MHC region definitions
## MHC Sub-region Number of variants % of variants SNPs Gene Position
## From to From to From To
## Ext_CLASS_I 11,180 11% rs3130838 rs1122947 TRIM27 MOG 28,866,528 29,638,434
## CLASS_I 42,718 42% rs375984 rs3131630 ZFP57 MICB 29,644,502 31,485,354
## CLASS_III 8,033 8% rs4959079 rs416352 MCCD1 NOTCH4 31,488,879 32,207,393
## CLASS_II 29,906 29% rs424232 rs3129223 AK123889 HLA-DPB2 32,208,324 33,113,197
## Ext_CLASS_II 9,403 10% rs1003979 rs1547668 COL11A2 MLN 33,114,171 33,775,446
## Total 103,310 100% DB snp UCSC UCSC HG19/Build37
# hla.genelist = geneannot %>% filter(chr=='6') %>% filter(start > 28866528 & end < 33775446) %>% .[["ensgene"]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment