Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Last active January 19, 2024 16:06
Show Gist options
  • Save explodecomputer/62ddac55a3f857b80ccee37f317f1871 to your computer and use it in GitHub Desktop.
Save explodecomputer/62ddac55a3f857b80ccee37f317f1871 to your computer and use it in GitHub Desktop.
example harmonisation script for multiple GWAS summary datasets
library(dplyr)
standardise <- function(d, ea_col="ea", oa_col="oa", beta_col="beta", eaf_col="eaf", chr_col="chr", pos_col="pos", vid_col="vid")
{
toflip <- d[[ea_col]] > d[[oa_col]]
d[[eaf_col]][toflip] <- 1 - d[[eaf_col]][toflip]
d[[beta_col]][toflip] <- d[[beta_col]][toflip] * -1
temp <- d[[oa_col]][toflip]
d[[oa_col]][toflip] <- d[[ea_col]][toflip]
d[[ea_col]][toflip] <- temp
d[[vid_col]] <- paste0(d[[chr_col]], ":", d[[pos_col]], "_", d[[ea_col]], "_", d[[oa_col]])
d
}
harmonise <- function(...)
{
l <- list(...)
# standardise each dataset and get new snp IDs
l <- lapply(l, standardise)
# get the SNPs in common across all datasets
# arrange to be in the same order
snpids <- Reduce(intersect, lapply(l, function(x) x$vid))
lapply(l, function(x) {
x %>%
filter(vid %in% snpids) %>%
arrange(chr, pos, ea, oa)
})
}
d1 <- tibble(
snp=paste0("rs", 1:4),
chr=10,
pos=1:4,
ea=c("C", "G", "A", "T"),
oa=c("G", "A", "T", "G"),
eaf=runif(4),
beta=rnorm(4)
)
d1
d2 <- tibble(
snp=paste0("rs", c(1:3, 3, 5)),
chr=10,
pos=c(1:3, 3,5),
ea=c("G", "G", "A", "A", "T"),
oa=c("C", "A", "T", "G", "G"), # different alleles at rs3
eaf=runif(5),
beta=rnorm(5)
)
d2
d3 <- tibble(
snp=paste0("rs", c(3,2,6)),
chr=10,
pos=c(3,2,6),
ea=c("A", "G", "T"),
oa=c("T", "A", "A"),
eaf=runif(3),
beta=rnorm(3)
)
d3
harmonise(d1, d2, d3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment