Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

View hakyim's full-sized avatar

Hae Kyung Im hakyim

View GitHub Profile
@hakyim
hakyim / fn_ratxcan.R
Last active January 26, 2024 19:35
RatXcan functions
# calculate ptrs function
calc_ptrs <- function(expr, weights)
{
fast_generate_trait(expr, weights) %>% as.data.frame() %>% rownames_to_column(var = "IID") %>% tibble()
}
fast_generate_trait <- function(expr, weights){
# convert the predicted df into a matrix
if(!is.matrix(expr)) test <- expr %>% column_to_rownames(var = "IID") %>%
as.matrix() else test <- expr
@hakyim
hakyim / fastlm.R
Last active January 21, 2021 23:46
## Relatively fast linear regression
fastlm = function(xx,yy)
{
## compute betahat (regression coef) and pvalue with Ftest
## for now it does not take covariates
df1 = 2
df0 = 1
ind = !is.na(xx) & !is.na(yy)
X = GXM
filename = paste0(data.dir,"tempo/gcta-run/gxm.grm")
ids = rnadata$sidno
write_GRMgz = function(X,filename,ids)
{
#X[upper.tri(X,diag=TRUE)]
rmat = row(X)
cmat = col(X)
omat = cbind(cmat[upper.tri(cmat,diag=TRUE)],rmat[upper.tri(rmat,diag=TRUE)],ncol(rnamat),X[upper.tri(X,diag=TRUE)])
write_tsv(data.frame(omat),path=filename,col_names = FALSE)
library(pwr)
pwr.r.test(n=90,sig.level=0.05,power=0.8)
## approximate correlation power calculation (arctangh transformation)
##
## n = 90
## r = 0.2913625
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
## add cytoband to gene
addband2gene = function(df,geneid = "ensembl_gene_id")
{
if(!exists("anno_gene"))
{
ensembl <- biomaRt::useMart(biomart="ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl")
anno_gene <<- biomaRt::getBM(attributes = c("ensembl_gene_id","external_gene_name","chromosome_name","start_position","end_position","band", "gene_biotype"),mart=ensembl )
print("defined anno_gene")
}
if(geneid != "ensembl_gene_id") names(df)[names(df)==geneid] = "ensembl_gene_id" ## this is an ugly workaround - need to find a way to use rename_ for this but don't know how to specify a string instead of name in rename(geneid = "ensembl_gene_id")
##install.packages("ggtern")
## library(ggtern)
## library(ggplot2)
plot_coloc_tern <- function(df) {
## function expects a data frame df with column names as follows
## coloc_p0 coloc_p1 coloc_p2 coloc_p3 coloc_p4
## 3.776851e-75 1.173353e-08 3.218853e-67 0.9999999326 5.563409e-08
## 2.020855e-107 5.216090e-01 1.633054e-107 0.4214557130 5.693525e-02
## meta analysis, sample size based
metaZn = function(zdisc,zrepl,ndisc,nrepl)
{
## calculate meta analysis Zscore
## zdisc and zrepl are zscores in the discovery and replication sets respectively
## ndisc and nrepl are sample sizes in the discovery and replication sets
wdisc = sqrt(ndisc)
wrepl = sqrt(nrepl)
( zdisc * wdisc + zrepl * wrepl )/sqrt( wdisc^2 + wrepl^2 )
}
@hakyim
hakyim / miscel.R
Last active November 20, 2022 16:12
## loglik test
logLik.test = function(fit0,fit)
{
l0 = logLik(fit0)
l1 = logLik(fit)
lratio = abs(as.numeric(l0)-as.numeric(l1))
df = abs( attr(l1,'df') - attr(l0,'df') )
pval = 1-pchisq( 2*lratio, df)
return(pval)
}
@hakyim
hakyim / tileplot.R
Last active January 21, 2021 23:49
tile plot with var names
require(ggplot2)
tileplot <- function(mat)
{
mat = data.frame(mat)
mat$Var1 = factor(rownames(mat), levels=rownames(mat)) ## preserve rowname order
melted_mat <- gather(mat,key=Var2,value=value,-Var1)
melted_mat$Var2 = factor(melted_mat$Var2, levels=colnames(mat)) ## preserve colname order
rango = range(melted_mat$value)
pp <- ggplot(melted_mat,aes(x=Var1,y=Var2,fill=value)) + geom_tile() ##+scale_fill_gradientn(colours = c("#C00000", "#FF0000", "#FF8080", "#FFC0C0", "#FFFFFF", "#C0C0FF", "#8080FF", "#0000FF", "#0000C0"), limit = c(-1,1))
@hakyim
hakyim / my.scatter.rug.R
Last active January 21, 2021 23:49
scatter plot + rug + fivenum
require(ggplot2); require(grid)
my.scatter.rug = function(df,xname,yname,main='myplot',round.digits=2)
{ ## df is data frame, xname is name of the column to be ploted in x axis
## http://stackoverflow.com/questions/8545035/scatterplot-with-marginal-histograms-in-ggplot2
xy = mutate_(df, "x"=xname, "y"=yname)
pp <- ggplot(xy, aes(x, y)) +
# set the locations of the x-axis labels as Tukey's five numbers
scale_x_continuous(limit=c(min(xy$x), max(xy$x)),
breaks=round(fivenum(xy$x),round.digits)) +