Skip to content

Instantly share code, notes, and snippets.

@k3yavi
Last active April 30, 2020 19:09
Show Gist options
  • Save k3yavi/76ee6343af833dd4ac95aaa94ef2fa55 to your computer and use it in GitHub Desktop.
Save k3yavi/76ee6343af833dd4ac95aaa94ef2fa55 to your computer and use it in GitHub Desktop.
generate intronic reference using eisaR
library(Biostrings)
library(BSgenome)
library(eisaR)
library(GenomicFeatures)
library(SummarizedExperiment)
library(tximeta)
library(rjson)
library(reticulate)
library(SingleCellExperiment)
library(scater)
## wget ftp://ftp.ensembl.org/pub/release-100/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
## wget ftp://ftp.ensembl.org/pub/release-100/gtf/homo_sapiens/Homo_sapiens.GRCh38.100.gtf.gz
## gzip Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
## code direct copy paste from https://combine-lab.github.io/alevin-tutorial/2020/alevin-velocity/
gtf.file <- "Homo_sapiens.GRCh38.100.gtf.gz"
genome.file <- "Homo_sapiens.GRCh38.dna.primary_assembly.fa"
grl <- eisaR::getFeatureRanges(
gtf = gtf.file,
featureType = c("spliced", "intron"),
intronType = "separate",
flankLength = 90L,
joinOverlappingIntrons = FALSE,
verbose = TRUE
)
grl[4:6]
genome <- Biostrings::readDNAStringSet( genome.file )
names(genome) <- sapply(strsplit(names(genome), " "), .subset, 1)
seqs <- GenomicFeatures::extractTranscriptSeqs(
x = genome,
transcripts = grl
)
Biostrings::writeXStringSet(
seqs, filepath = paste0(gtf.file, ".expanded.fa")
)
eisaR::exportToGtf(
grl,
filepath = paste0(gtf.file, ".expanded.gtf")
)
head(metadata(grl)$corrgene)
write.table(
metadata(grl)$corrgene,
file = paste0(gtf.file, ".expanded.features.tsv"),
row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t"
)
df <- eisaR::getTx2Gene(
grl, filepath = paste0(gtf.file, ".tx2gene.tsv")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment