|
|
|
## allgensnp ----------------------------- |
|
|
|
SNP1 <- "rs1528723" ## mine |
|
SNP2 <- "rs7412" ## examples |
|
SNP3 <- "rs143384" |
|
dat1 <- allgensnp(snp = SNP1) |
|
dat2 <- allgensnp(snp = SNP2) |
|
dat3 <- allgensnp(snp = SNP3) |
|
|
|
head(dat1) |
|
head(dat2) |
|
head(dat3) |
|
# > table(dat2$local_genotype) |
|
## CC CT TC -- TT 00 T |
|
## 2807 356 66 43 19 16 1 |
|
|
|
## > head(dat3) |
|
## name chromosome position name id genotype_id local_genotype |
|
## 1 rs143384 20 35437976 R.M. Holston 22 8 AG |
|
## 2 rs143384 20 35437976 Charles G. Sullivan 5326 3834 AA |
|
|
|
## Improvements |
|
## - better error output for allgensnp() |
|
## - improve docu (what does 00 mean in the table above?) |
|
## - columns names not described |
|
## - what is "Curl options passed on to crul::HttpClient" |
|
## - "value" description missing (see fetch_genotypes) |
|
## - twice "name" as column |
|
|
|
## New features |
|
|
|
|
|
|
|
## allphenotypes ----------------------------- |
|
|
|
dat <- allphenotypes(df = TRUE) |
|
|
|
names(dat) |
|
head(dat) |
|
table(dat$characteristic) |
|
|
|
## Improvements |
|
## - not a dataframe, even with df = TRUE, but a list |
|
## - columns names not described |
|
## - number_of_users != length(unique(dat$id)) |
|
## - explain: phenotypes vs allphenotypes |
|
## - "value" description missing (see fetch_genotypes) |
|
## - dat %>% dplyr::filter(characteristic == "Height") |
|
|
|
## New Feature |
|
|
|
## annotations ------------------------------- |
|
|
|
annotations(snp = SNP3, output = "plos") |
|
annotations(snp = SNP3, output = "mendeley") |
|
annotations(snp = SNP3, output = "snpedia") |
|
annotations(snp = SNP3, output = "metadata") |
|
|
|
## Improvements |
|
## - "value" description missing (see fetch_genotypes) |
|
|
|
## New Feature |
|
## - add feature eqtl |
|
## - adding gwas catalog |
|
|
|
## download_users ---------------------------- |
|
|
|
download_users(id = 33) |
|
## wc -l ~/33.23andme.12.txt |
|
download_users(id = "Sullivan") ## Sullivan is only a partial name |
|
|
|
## Improvements |
|
## - better error message for sullivan: Error in strsplit(fileurl, "/")[[1]] : subscript out of bounds |
|
|
|
## - value not very specific >> bc it downloads any file |
|
|
|
## fetch_genotypes ---------------------------- |
|
data <- users(df = TRUE) |
|
head( data[[1]] ) # users with links to genome data |
|
mydata <- fetch_genotypes(url = data[[1]][1,"genotypes.download_url"],file="~/myfile.txt") |
|
|
|
mydata |
|
system("wc -l ~/myfile.txt") |
|
|
|
## Improvements |
|
## - difference between download_users and fetch_genotypes |
|
|
|
|
|
## genotypes --------------------------------- |
|
|
|
genotypes(SNP3, userid='1-20', df=TRUE) |
|
genotypes(c(SNP3, SNP1), userid='1-20', df=TRUE) |
|
|
|
|
|
## > head(dat3[,-4] %>% dplyr::arrange(id)) |
|
## name chromosome position id genotype_id local_genotype |
|
## 1 rs143384 20 35437976 1 9 AG |
|
## 2 rs143384 20 35437976 6 5 AG |
|
## 3 rs143384 20 35437976 8 2 GG |
|
|
|
## > genotypes(SNP3, userid='1-20', df=TRUE) |
|
## snp_name snp_chromosome snp_position user_name user_id genotype_id genotype |
|
## 1 rs143384 20 35437976 Bastian Greshake Tzovaras 1 9 AG |
|
## 2 rs143384 20 35437976 Nash Parovoz 6 5 AG |
|
## 3 rs143384 20 35437976 Samantha B. Clark 8 2 GG |
|
|
|
## Improvements |
|
## - difference between genotypes and allgensnp |
|
## - better error message for: genotypes(c(SNP3, SNP1), userid='1-20', df=TRUE) |
|
## - "value" description missing (see fetch_genotypes) |
|
|
|
|
|
## ncbi_snp_query ----------------------------- |
|
|
|
## have another assembly than 38 |
|
ncbi_snp_query(c(SNP1, SNP2, SNP3)) |
|
ncbi_snp_query(c(SNP2)) |
|
|
|
# Query Chromosome Marker Class Gene Alleles Major Minor MAF BP AncestralAllele |
|
# 1 rs1528723 8 rs1528723 snp UNC5D A/T A T 0.0942 35269868 A,A,A,A,A,A |
|
# 2 rs7412 19 rs7412 snp UNC5D C/T C T 0.0751 35269868 C,C,C,C,C,C |
|
# 3 rs143384 20 rs143384 snp UNC5D C/T C T 0.4389 35269868 C,C,C,C,C,C |
|
|
|
## -- wrong return of gene: |
|
#rs1528723 > UNC5D |
|
#rs7412 > APOE |
|
#rs143384 > GDF5 |
|
|
|
## THIS IS FIXED IN THE GH VERSION |
|
|
|
## Improvements |
|
## - difference between ncbi_snp_query2 and ncbi_snp_query |
|
## - add pos.gr38 |
|
## - "value" description missing (see fetch_genotypes) |
|
|
|
|
|
|
|
## ncbi_snp_query2 ----------------------------- |
|
ncbi_snp_query2(c(SNP1, SNP2, SNP3)) |
|
ncbi_snp_query2(c(SNP2)) |
|
|
|
## Improvements |
|
## - no gene, but organism |
|
## - "value" description missing (see fetch_genotypes) |
|
|
|
|
|
|
|
## ncbi_snp_summary ----------------------------- |
|
|
|
ncbi_snp_summary(c(SNP1, SNP2, SNP3)) |
|
|
|
## Improvements |
|
|
|
## - "value" description missing (see fetch_genotypes) |
|
## - what is the gene2 column (gene:number) |
|
|
|
|
|
## phenotypes ----------------------------------- |
|
|
|
phenotypes(userid='1-8', df=TRUE) |
|
|
|
## Improvements |
|
|
|
## - phenotypes vs phenotypes_byid vs allphenotypes |
|
## - "value" description missing (see fetch_genotypes) |
|
|
|
|
|
## phenotypes_byid -------------------------------- |
|
|
|
phenotypes_byid(phenotypeid = 57) |
|
|