Skip to content

Instantly share code, notes, and snippets.

@mikelove
Created June 14, 2022 18:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikelove/3767dfdc7cddd3be075ebb7a34edee2b to your computer and use it in GitHub Desktop.
Save mikelove/3767dfdc7cddd3be075ebb7a34edee2b to your computer and use it in GitHub Desktop.
Testing MPRA sequences can be computed from ID
library(Biostrings)
dna <- readDNAStringSet("test_sequences.fasta")
ids <- names(dna)
library(tidyverse)
library(plyranges)
dat <- data.frame(ids)
into <- c("seqnames","start","end","pos","relpos",
"ref","alt","allele","rsid","RE")
dat <- dat %>%
separate(ids, sep="_", into=into, convert=TRUE)
gr <- dat %>%
as_granges()
library(BSgenome.Hsapiens.UCSC.hg38)
my_dna <- getSeq(Hsapiens, gr)
idx <- 1:44 # non halplotypes
ref_width <- nchar(dat$ref)
# check that reference allele sequence is as expected
ref_ss <- subseq(my_dna[idx],
as.numeric(dat$relpos[idx]),
as.numeric(dat$relpos[idx]) + ref_width[idx] - 1)
# check ref matches
all(ref_ss == DNAStringSet(dat$ref[idx]))
ir <- IRanges(as.numeric(dat$relpos[idx]),
as.numeric(dat$relpos[idx]) + ref_width[idx] - 1)
irl <- IRangesList(split(ir, seq_along(ir)))
names(irl) <- NULL
alt <- replaceAt(my_dna[idx],
irl,
CharacterList(split(dat$alt[idx], idx)))
plot(width(alt))
dat <- dat %>%
mutate(alt_idx = allele %in% c("alt","altL","altR"))
width(alt)[dat$alt_idx[idx]]
my_probe <- my_dna[idx]
my_probe[dat$alt_idx[idx]] <- alt[dat$alt_idx[idx]]
re_idx <- dat$RE[idx] != "noREsites"
re_at <- substr(dat$RE[idx][re_idx], 2,
nchar(dat$RE[idx][re_idx]) - 1)
re_value <- substr(dat$RE[idx][re_idx],
nchar(dat$RE[idx][re_idx]),
nchar(dat$RE[idx][re_idx]))
my_probe_fix <- my_probe
re_irl <- IRangesList(split(IRanges(as.numeric(re_at),width=1),
seq_along(re_at)))
names(re_irl) <- NULL
my_probe_fix[re_idx] <- replaceAt(
my_probe[re_idx],
re_irl,
CharacterList(split(re_value, seq_along(re_value))))
all(my_probe_fix == dna[idx])
####
dna[45:48]
gr[45:48]
dna[45] == my_dna[45]
last_three <- replaceAt(
my_dna[46:48],
IRangesList(IRanges(112,112),
IRanges(141,141),
IRanges(c(112,141),width=1)),
CharacterList("C","A",c("C","A")))
dna[46:48] == last_three
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment