Skip to content

Instantly share code, notes, and snippets.

@hakyim
Last active November 20, 2023 21:21
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hakyim/1be711ef109a8cb35c46313a983080d5 to your computer and use it in GitHub Desktop.
Save hakyim/1be711ef109a8cb35c46313a983080d5 to your computer and use it in GitHub Desktop.
X = GXM
filename = paste0(data.dir,"tempo/gcta-run/gxm.grm")
ids = rnadata$sidno
write_GRMgz = function(X,filename,ids)
{
#X[upper.tri(X,diag=TRUE)]
rmat = row(X)
cmat = col(X)
omat = cbind(cmat[upper.tri(cmat,diag=TRUE)],rmat[upper.tri(rmat,diag=TRUE)],ncol(rnamat),X[upper.tri(X,diag=TRUE)])
write_tsv(data.frame(omat),path=filename,col_names = FALSE)
if(length(list.files(paste0(filename,".gz") ) )>0) system(paste0("rm ",filename,".gz"))
system(paste0("gzip ",filename))
write_tsv(data.frame(ids,ids),path=paste0(filename,".id"),col_names = FALSE)
}
write_GRMgz(GXM,filename,ids)
## function in OmicKriging modified from original fro GCTA documentation
read_GRMBin <-
function (prefix, size = 4)
{
sum_i <- function(i) {
return(sum(1:i))
}
BinFileName <- paste(prefix, ".bin", sep = "")
NFileName <- paste(prefix, ".N.bin", sep = "")
IDFileName <- paste(prefix, ".id", sep = "")
id <- read.table(IDFileName)
n <- dim(id)[1]
BinFile <- file(BinFileName, "rb")
grm <- readBin(BinFile, n = n * (n + 1)/2, what = numeric(0),
size = size)
NFile <- file(NFileName, "rb")
N <- readBin(NFile, n = 1, what = numeric(0), size = size)
i <- sapply(1:n, sum_i)
close(BinFile)
close(NFile)
diag.elem <- grm[i]
off.diag.elem <- grm[-i]
X <- diag(diag.elem)
X[upper.tri(X, diag = FALSE)] <- off.diag.elem
X <- X + t(X) - diag(diag(X))
rownames(X) <- id$V2
colnames(X) <- id$V2
return(X)
}
Another version of readGRMbin is here with slightly different output format https://ibg.colorado.edu/cdrom2022/day7/Answers/RR_WorkshopDay7.html
## this may be faster when the sample size is large. It uses the exact formula for sum_i rather than calculate with a for loop. But it didn't make a lot of difference when n~3100
read_GRMBin_opt <-
function (prefix, size = 4)
{
#sum_i <- function(i) {
# return(sum(1:i))
#}
BinFileName <- paste(prefix, ".bin", sep = "")
NFileName <- paste(prefix, ".N.bin", sep = "")
IDFileName <- paste(prefix, ".id", sep = "")
id <- read.table(IDFileName)
n <- dim(id)[1]
BinFile <- file(BinFileName, "rb")
grm <- readBin(BinFile, n = n * (n + 1)/2, what = numeric(0),
size = size)
NFile <- file(NFileName, "rb")
N <- readBin(NFile, n = 1, what = numeric(0), size = size)
#i <- sapply(1:n, sum_i)
nvec <- 1:n
i <- nvec * (nvec + 1)/2
close(BinFile)
close(NFile)
diag.elem <- grm[i]
off.diag.elem <- grm[-i]
X <- diag(diag.elem)
X[upper.tri(X, diag = FALSE)] <- off.diag.elem
X <- X + t(X) - diag(diag(X))
rownames(X) <- id$V2
colnames(X) <- id$V2
return(X)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment