Skip to content

Instantly share code, notes, and snippets.

@anamariaelek
Created March 26, 2022 19:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anamariaelek/6d78368b1ae572d137487fc1e013c951 to your computer and use it in GitHub Desktop.
Save anamariaelek/6d78368b1ae572d137487fc1e013c951 to your computer and use it in GitHub Desktop.
require(data.table)
require(stringr)
# GTF TO BED
gtf_to_bed <- function(gtf, feature="transcript", bed_file=NULL) {
if ("character" %in% class(gtf)) {
gtf <- fread(gtf)
} else if ("data.frame" %in% class(gtf)) {
setDT(gtf)
}
setnames(gtf,paste0("V",1:ncol(gtf)))
bed <- gtf[V3==feature][
,V10:=str_extract(V9,sprintf('(?<=%s_id ")[^"]+',feature))][
,.(V1,V4,V5,V10,V6,V7)][]
if (!is.null(bed_file)) (
fwrite(bed, bed_file, sep="\t", col.names = FALSE, quote = FALSE)
)
return(bed)
}
# GTF TO GFF
gtf_to_gff <- function(gtf, feature=c("transcript","exon"), gff_file=NULL) {
if ("character" %in% class(gtf)) {
gtf <- fread(gtf)
} else if ("data.frame" %in% class(gtf)) {
setDT(gtf)
}
setnames(gtf,paste0("V",1:ncol(gtf)))
gff <- gtf[V3 %in% feature]
for (i in seq_along(feature)) {
ft = feature[i]
if (any(grepl(ft, gff[V3==ft,V9])))
gff[V3==ft, V10:=sprintf("ID=%s",str_extract(V9,sprintf('(?<=%s_id ")[^"]+',ft)))]
if (i>1) {
fp = feature[i-1]
gff[V3==ft, V10:=sprintf("Parent=%s",str_extract(V9,sprintf('(?<=%s_id ")[^"]+',fp)))]
}
}
gff[,V9:=NULL]
if (!is.null(gff_file)) (
fwrite(gff, gff_file, sep="\t", col.names = FALSE, quote = FALSE)
)
return(gff)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment