Skip to content

Instantly share code, notes, and snippets.

@kippjohnson
Created October 9, 2018 17:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kippjohnson/cf781aca9f57fd4d07f582afcf805f10 to your computer and use it in GitHub Desktop.
Save kippjohnson/cf781aca9f57fd4d07f582afcf805f10 to your computer and use it in GitHub Desktop.
## Kipp Johnson
## Ugly code to find all potential merged rsIDs
rm(list=ls())
setwd('~/Dropbox/DudleyLab/chayakrit/pad_gwas/validation/')
## packages
library(data.table)
library(httr)
library(jsonlite)
library(stringr)
## Read in SNPlist
snp0 <- fread('63-snplist.txt')
snp0 <- snp0$SNP # character vector
np0 <- str_remove(snp0, "rs") ## Can't have rs string
## Will query NCBIVar restful api
baseurl <- 'https://api.ncbi.nlm.nih.gov/variation/v0/beta/refsnp/'
httr::set_config(httr::config(http_version = 0)) # some server problem
getMerged <- function(rsid_number, base=baseurl){
query <- paste0(base, rsid_number)
res1 <- GET(url=query)
con1 <- content(res1, "text")
json1 <- fromJSON(con1)
if(length(json1$dbsnp1_merges)>0){
merged_rsid <- json1$dbsnp1_merges[,'merged_rsid']
}else{
merged_rsid <- NA
}
## NCBI Var only allows 1 query/second
Sys.sleep(1.25)
#return(merged_rsid)
return.df <- matrix(NA, ncol=2, nrow=max(1, length(merged_rsid)))
return.df[,1] <- rep(paste0('rs',rsid_number))
if(any(is.na(merged_rsid))){
merged_rsid <- merged_rsid # do nothing
}else{
merged_rsid <- paste0('rs', merged_rsid)
}
return.df[,2] <- merged_rsid
colnames(return.df) <- c('rsID', 'merged.rsID')
return(return.df)
}
# test
getMerged(np0[1])
## Query for all of the SNPs
for(i in 1:length(np0)){
print(i)
if(i==1){
snps <- getMerged(np0[i])
}else{
try(
snps <- rbind(snps, getMerged(np0[i])) # horribly inefficient :/
)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment