Skip to content

Instantly share code, notes, and snippets.

View hakyim's full-sized avatar

Hae Kyung Im hakyim

View GitHub Profile
@hakyim
hakyim / fn_ratxcan.R
Last active May 1, 2024 01:35
RatXcan functions
# calculate ptrs function
calc_ptrs <- function(expr, weights)
{
fast_generate_trait(expr, weights) %>% as.data.frame() %>% rownames_to_column(var = "IID") %>% tibble()
}
fast_generate_trait <- function(expr, weights){
# convert the predicted df into a matrix
if(!is.matrix(expr)) test <- expr %>% column_to_rownames(var = "IID") %>%
as.matrix() else test <- expr
## meta analysis, sample size based
metaZn = function(zdisc,zrepl,ndisc,nrepl)
{
## calculate meta analysis Zscore
## zdisc and zrepl are zscores in the discovery and replication sets respectively
## ndisc and nrepl are sample sizes in the discovery and replication sets
wdisc = sqrt(ndisc)
wrepl = sqrt(nrepl)
( zdisc * wdisc + zrepl * wrepl )/sqrt( wdisc^2 + wrepl^2 )
}
@hakyim
hakyim / qqunif.R
Last active February 2, 2024 22:29
qqunif
## pvalue vs uniform
qqunif =
function(p,BH=T,CI=T,mlog10_p_thres=30,...)
{
## thresholded by default at 1e-30
p=na.omit(p)
nn = length(p)
xx = -log10((1:nn)/(nn+1))
X = GXM
filename = paste0(data.dir,"tempo/gcta-run/gxm.grm")
ids = rnadata$sidno
write_GRMgz = function(X,filename,ids)
{
#X[upper.tri(X,diag=TRUE)]
rmat = row(X)
cmat = col(X)
omat = cbind(cmat[upper.tri(cmat,diag=TRUE)],rmat[upper.tri(rmat,diag=TRUE)],ncol(rnamat),X[upper.tri(X,diag=TRUE)])
write_tsv(data.frame(omat),path=filename,col_names = FALSE)
@hakyim
hakyim / miscel.R
Last active November 20, 2022 16:12
## loglik test
logLik.test = function(fit0,fit)
{
l0 = logLik(fit0)
l1 = logLik(fit)
lratio = abs(as.numeric(l0)-as.numeric(l1))
df = abs( attr(l1,'df') - attr(l0,'df') )
pval = 1-pchisq( 2*lratio, df)
return(pval)
}
@hakyim
hakyim / qqR2.R
Last active March 17, 2022 20:50
Plots observed R^2 vs expected R2 under null of 0 correlation
qqR2 <- function(corvec,nn,pad_neg_with_0 = FALSE,...)
{
## nn is the sample size, number of individuals used to compute correlation.
## needs correlation vector as input.
## nullcorvec generates a random sample from correlation distributions, under the null hypothesis of 0 correlation using Fisher's approximation.
if(pad_neg_with_0) corvec[corvec < 0 | is.na(corvec) ]=0
mm <- length(corvec)
nullcorvec = tanh(rnorm(mm)/sqrt(nn-3)) ## null correlation vector
qqplot(nullcorvec^2,corvec^2,...); abline(0,1); grid()
}
@hakyim
hakyim / eFDR.R
Last active January 21, 2021 23:49
eFDR: compute empirical FDR based on observed vector of p values and empirical null p values computed using simulated phenotype
## May 7, 2012
## Hae Kyung Im
## haky@uchicago.edu
##
## Please cite:
## Eric R Gamazon, R. Stephanie Huang, Eileen Dolan, Nancy Cox, and Hae Kyung Im, (2012)
## Integrative Genomics: Quantifying significance of phenotype-genotype
## relationships from multiple sources of high-throughput data
@hakyim
hakyim / my.scatter.rug.R
Last active January 21, 2021 23:49
scatter plot + rug + fivenum
require(ggplot2); require(grid)
my.scatter.rug = function(df,xname,yname,main='myplot',round.digits=2)
{ ## df is data frame, xname is name of the column to be ploted in x axis
## http://stackoverflow.com/questions/8545035/scatterplot-with-marginal-histograms-in-ggplot2
xy = mutate_(df, "x"=xname, "y"=yname)
pp <- ggplot(xy, aes(x, y)) +
# set the locations of the x-axis labels as Tukey's five numbers
scale_x_continuous(limit=c(min(xy$x), max(xy$x)),
breaks=round(fivenum(xy$x),round.digits)) +
@hakyim
hakyim / tileplot.R
Last active January 21, 2021 23:49
tile plot with var names
require(ggplot2)
tileplot <- function(mat)
{
mat = data.frame(mat)
mat$Var1 = factor(rownames(mat), levels=rownames(mat)) ## preserve rowname order
melted_mat <- gather(mat,key=Var2,value=value,-Var1)
melted_mat$Var2 = factor(melted_mat$Var2, levels=colnames(mat)) ## preserve colname order
rango = range(melted_mat$value)
pp <- ggplot(melted_mat,aes(x=Var1,y=Var2,fill=value)) + geom_tile() ##+scale_fill_gradientn(colours = c("#C00000", "#FF0000", "#FF8080", "#FFC0C0", "#FFFFFF", "#C0C0FF", "#8080FF", "#0000FF", "#0000C0"), limit = c(-1,1))
##install.packages("ggtern")
## library(ggtern)
## library(ggplot2)
plot_coloc_tern <- function(df) {
## function expects a data frame df with column names as follows
## coloc_p0 coloc_p1 coloc_p2 coloc_p3 coloc_p4
## 3.776851e-75 1.173353e-08 3.218853e-67 0.9999999326 5.563409e-08
## 2.020855e-107 5.216090e-01 1.633054e-107 0.4214557130 5.693525e-02