Skip to content

Instantly share code, notes, and snippets.

@bschiffthaler
Created September 11, 2019 07:25
Show Gist options
  • Save bschiffthaler/d899730d23e3a32c194a1d52c92b8b2c to your computer and use it in GitHub Desktop.
Save bschiffthaler/d899730d23e3a32c194a1d52c92b8b2c to your computer and use it in GitHub Desktop.
Analyze your own SNP data from 23andMe using SNPedia
library(SNPediaR)
library(tidyverse)
# Original function has som issues with parsing that need fixing
getPages2 <- function (titles, verbose = FALSE, limit = 50, wikiParseFunction = identity,
baseURL, format, query, ...)
{
if (missing(baseURL))
baseURL <- "https://bots.snpedia.com/api.php"
if (missing(format))
format <- "format=json"
if (missing(query))
query <- "action=query&prop=revisions&rvprop=content&titles="
baseURL <- paste0(baseURL, "?", format, "&", query)
Np <- length(titles)
Cp <- 0
titles <- RCurl::curlEscape(titles)
n.batches <- 1 + length(titles)%/%limit
suppressWarnings(titles <- split(titles, 1:n.batches))
titles <- sapply(titles, paste, collapse = "|")
res <- list()
for (tit in titles) {
pagesURL <- paste0(baseURL, tit)
if (verbose) {
Cp <- Cp + limit
cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "Downloading",
min(Cp, Np), "of", Np, "pages ...", pagesURL,
fill = TRUE)
}
curl_success = FALSE
while (! curl_success)
{
tryCatch({
datos <- RCurl::getURL(pagesURL)
datos <- gsub("\\n", "\\\\n", datos)
datos <- gsub("\\t", "\\\\t", datos)
datos <- gsub("\\\\\\\\x", "~x", datos)
datos <- gsub("\\\\x", "~x", datos)
datos <- gsub("\\| *", "\\|", datos)
datos <- gsub(" *= *", "=", datos)
datos <- gsub("w\\\\o", "without", datos)
datos <- jsonlite::fromJSON(datos)
datos <- datos[["query"]][["pages"]]
nombres <- sapply(datos, function(x) x[["title"]])
datos <- lapply(datos, function(x) x[["revisions"]][["*"]])
names(datos) <- nombres
datos <- lapply(datos, wikiParseFunction, ...)
res <- c(res, datos)
curl_success <- TRUE
}, finally = {})
}
}
return(res)
}
# Needed to orient some SNPs in accordance with SNPedia. 23andme reports all SNPs
# on the plus strand, but SNPedia reports some on minus.
complement <- c('A' = 'T', 'C' = 'G', 'G' = 'C', 'T' = 'A', 'D' = 'D', '-' = '-', 'I' = 'I')
make_complement <- function(x, o) {
sapply(seq_along(x), function(i){
y <- x[i]
if (o[i] == 'minus') {
paste(complement[unlist(str_split(y, ''))], collapse = '')
} else {
y
}
})
}
# Get all pages in SNPedia with a genotype entry (takes a while)
snpedia_gts <- SNPediaR::getCategoryElements('Is_a_genotype')
# Get all SNPedia snps that are relevant. We also need to get orientation info
# in case we need to complement
snpedia_snps <- SNPediaR::getCategoryElements('On_chip_23andMe_v5')
snpedia_snps <- intersect(unique(sapply(str_split(snpedia_gts, '\\('), '[[', 1)), snpedia_snps)
snpedia_snp_tags <- getPages2(snpedia_snps, verbose = TRUE)
orientation <- as_tibble(do.call(rbind, lapply(seq_along(snpedia_snp_tags), function(i) {
page <- snpedia_snp_tags[[i]]
name <- names(snpedia_snp_tags)[i]
c('name' = name, extractSnpTags(page, c('rsid', 'StabilizedOrientation')))
})))
# Load all my SNP data
my_snps <- read_tsv(file.path('~/Downloads/',
'genome_Bastian_Schiffthaler_v5_Full_20190326095221.txt'),
col_names = c("rsid", "chr", "pos", "gt"), comment = '#',
col_types = 'ccnc')
my_snps$orientation <- orientation$StabilizedOrientation[match(my_snps$rsid, str_to_lower(orientation$name))]
my_snps <- my_snps %>%
# Filter out SNPs without orientation info in SNPedia
filter(!is.na(orientation)) %>%
# Complement SNPs on minus strand
mutate(gt_s = make_complement(gt, orientation)) %>%
# Create genotype string to query SNPedia
mutate(geno = paste0(str_to_title(rsid), '(', str_sub(gt_s, 1, 1), ';', str_sub(gt_s, 2, 2), ')'))
# Filter such that we only have SNPs with genotype pages in SNPedia
my_snps_f <- my_snps %>%
filter(geno %in% snpedia_gts)
# Get page data
my_pages <- getPages(my_snps_f$geno, wikiParseFunction = extractGenotypeTags)
# Wrap into a tibble
my_gt_info <- as_tibble(do.call(rbind, my_pages))
my_gt_info %>% arrange(desc(magnitude)) %>% filter(repute == 'Good' & magnitude >= 1) -> x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment