Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Created May 29, 2013 03:22
Show Gist options
  • Save explodecomputer/5667762 to your computer and use it in GitHub Desktop.
Save explodecomputer/5667762 to your computer and use it in GitHub Desktop.
Get rs IDs for list of SNPs
library(SNPlocs.Hsapiens.dbSNP.20120608)
library(plyr)
# Read in bim file
bimname <- "mnd_b37_flipped.bim"
bim <- read.table(bimname, colClasses=c("numeric", "character", "numeric", "numeric", "character", "character"))
bim$index <- 1:nrow(bim)
# For each chromosome download the SNP locations and match to bim file
a <- ddply(bim, .(V1), .progress="text", function(x)
{
x <- mutate(x)
chr <- paste("ch", x$V1[1], sep="")
snps <- getSNPlocs(chr)
snps <- subset(snps, loc %in% x$V4, select=-c(alleles_as_ambig))
snps$RefSNP_id <- paste("rs", snps$RefSNP_id, sep="")
snps <- subset(snps, !duplicated(loc))
snps <- subset(snps, !duplicated(RefSNP_id))
x <- merge(x, snps, by.x="V4", by.y="loc", all.x=T)
x <- x[order(x$index), ]
index <- !is.na(x$RefSNP_id)
x$V2[index] <- x$RefSNP_id[index]
x <- subset(x, select=c(V1, V2, V3, V4, V5, V6))
return(x)
})
# If there are duplicated SNPs for any reason then label them as _2, _3 etc
temp <- rle(a$V2)
temp2 <- paste0(rep(temp$values, times = temp$lengths), "_", unlist(lapply(temp$lengths, seq_len)))
temp2 <- gsub("_1", "", temp2)
a$V2 <- temp2
# Save file
write.table(a, file="mnd_b37_flipped.bim", row=F, col=F, qu=F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment