Skip to content

Instantly share code, notes, and snippets.

@sinarueeger
Last active May 11, 2019 14:24
Show Gist options
  • Save sinarueeger/875bf0df47b1a389a04c476d807f3d29 to your computer and use it in GitHub Desktop.
Save sinarueeger/875bf0df47b1a389a04c476d807f3d29 to your computer and use it in GitHub Desktop.
overview of current rsnps functions

NCBI vs OPENSNP

Function design

  • what about searching for pos and chr?
  • some of the functions only work for 1 SNP. extend for a vector of SNPs (e.g. genotypes())
  • add progressbar

Add SNP info

LD stuff

## allgensnp -----------------------------
SNP1 <- "rs1528723" ## mine
SNP2 <- "rs7412" ## examples
SNP3 <- "rs143384"
dat1 <- allgensnp(snp = SNP1)
dat2 <- allgensnp(snp = SNP2)
dat3 <- allgensnp(snp = SNP3)
head(dat1)
head(dat2)
head(dat3)
# > table(dat2$local_genotype)
## CC CT TC -- TT 00 T
## 2807 356 66 43 19 16 1
## > head(dat3)
## name chromosome position name id genotype_id local_genotype
## 1 rs143384 20 35437976 R.M. Holston 22 8 AG
## 2 rs143384 20 35437976 Charles G. Sullivan 5326 3834 AA
## Improvements
## - better error output for allgensnp()
## - improve docu (what does 00 mean in the table above?)
## - columns names not described
## - what is "Curl options passed on to crul::HttpClient"
## - "value" description missing (see fetch_genotypes)
## - twice "name" as column
## New features
## allphenotypes -----------------------------
dat <- allphenotypes(df = TRUE)
names(dat)
head(dat)
table(dat$characteristic)
## Improvements
## - not a dataframe, even with df = TRUE, but a list
## - columns names not described
## - number_of_users != length(unique(dat$id))
## - explain: phenotypes vs allphenotypes
## - "value" description missing (see fetch_genotypes)
## - dat %>% dplyr::filter(characteristic == "Height")
## New Feature
## annotations -------------------------------
annotations(snp = SNP3, output = "plos")
annotations(snp = SNP3, output = "mendeley")
annotations(snp = SNP3, output = "snpedia")
annotations(snp = SNP3, output = "metadata")
## Improvements
## - "value" description missing (see fetch_genotypes)
## New Feature
## - add feature eqtl
## - adding gwas catalog
## download_users ----------------------------
download_users(id = 33)
## wc -l ~/33.23andme.12.txt
download_users(id = "Sullivan") ## Sullivan is only a partial name
## Improvements
## - better error message for sullivan: Error in strsplit(fileurl, "/")[[1]] : subscript out of bounds
## - value not very specific >> bc it downloads any file
## fetch_genotypes ----------------------------
data <- users(df = TRUE)
head( data[[1]] ) # users with links to genome data
mydata <- fetch_genotypes(url = data[[1]][1,"genotypes.download_url"],file="~/myfile.txt")
mydata
system("wc -l ~/myfile.txt")
## Improvements
## - difference between download_users and fetch_genotypes
## genotypes ---------------------------------
genotypes(SNP3, userid='1-20', df=TRUE)
genotypes(c(SNP3, SNP1), userid='1-20', df=TRUE)
## > head(dat3[,-4] %>% dplyr::arrange(id))
## name chromosome position id genotype_id local_genotype
## 1 rs143384 20 35437976 1 9 AG
## 2 rs143384 20 35437976 6 5 AG
## 3 rs143384 20 35437976 8 2 GG
## > genotypes(SNP3, userid='1-20', df=TRUE)
## snp_name snp_chromosome snp_position user_name user_id genotype_id genotype
## 1 rs143384 20 35437976 Bastian Greshake Tzovaras 1 9 AG
## 2 rs143384 20 35437976 Nash Parovoz 6 5 AG
## 3 rs143384 20 35437976 Samantha B. Clark 8 2 GG
## Improvements
## - difference between genotypes and allgensnp
## - better error message for: genotypes(c(SNP3, SNP1), userid='1-20', df=TRUE)
## - "value" description missing (see fetch_genotypes)
## ncbi_snp_query -----------------------------
## have another assembly than 38
ncbi_snp_query(c(SNP1, SNP2, SNP3))
ncbi_snp_query(c(SNP2))
# Query Chromosome Marker Class Gene Alleles Major Minor MAF BP AncestralAllele
# 1 rs1528723 8 rs1528723 snp UNC5D A/T A T 0.0942 35269868 A,A,A,A,A,A
# 2 rs7412 19 rs7412 snp UNC5D C/T C T 0.0751 35269868 C,C,C,C,C,C
# 3 rs143384 20 rs143384 snp UNC5D C/T C T 0.4389 35269868 C,C,C,C,C,C
## -- wrong return of gene:
#rs1528723 > UNC5D
#rs7412 > APOE
#rs143384 > GDF5
## THIS IS FIXED IN THE GH VERSION
## Improvements
## - difference between ncbi_snp_query2 and ncbi_snp_query
## - add pos.gr38
## - "value" description missing (see fetch_genotypes)
## ncbi_snp_query2 -----------------------------
ncbi_snp_query2(c(SNP1, SNP2, SNP3))
ncbi_snp_query2(c(SNP2))
## Improvements
## - no gene, but organism
## - "value" description missing (see fetch_genotypes)
## ncbi_snp_summary -----------------------------
ncbi_snp_summary(c(SNP1, SNP2, SNP3))
## Improvements
## - "value" description missing (see fetch_genotypes)
## - what is the gene2 column (gene:number)
## phenotypes -----------------------------------
phenotypes(userid='1-8', df=TRUE)
## Improvements
## - phenotypes vs phenotypes_byid vs allphenotypes
## - "value" description missing (see fetch_genotypes)
## phenotypes_byid --------------------------------
phenotypes_byid(phenotypeid = 57)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment