Skip to content

Instantly share code, notes, and snippets.

@sp00nman
Created January 15, 2016 17:36
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sp00nman/f8c6fd6d748f90eaf697 to your computer and use it in GitHub Desktop.
Save sp00nman/f8c6fd6d748f90eaf697 to your computer and use it in GitHub Desktop.
Convert cufflinks gene ids to ensembl ids
#! /usr/bin/env Rscript
suppressPackageStartupMessages(expr = library(package = "rtracklayer"))
reference_granges <- import(con ='file1.gtf')
reference_frame <- unique(x = data.frame(
ensembl_gene_id = reference_granges$gene_id,
ensembl_transcript_id = reference_granges$transcript_id,
stringsAsFactors = FALSE))
rm(reference_granges)
assembled_granges <- import(con ='merge.gtf')
assembled_frame <- unique(x = data.frame(
gene_id = assembled_granges$gene_id,
transcript_id = assembled_granges$transcript_id,
gene_name = assembled_granges$gene_name,
ensembl_transcript_id = assembled_granges$nearest_ref,
stringsAsFactors = FALSE))
rm(assembled_granges)
merged_frame <- merge(x = reference_frame, y = assembled_frame, by =
"ensembl_transcript_id")
merged_frame[merged_frame$gene_id == 'XLOC_000005', ]
###
# for each gene_id ...for 1000 lines approx 5-10sec
# apply would make it faster...
uniq_gene_id <- unique(merged_frame$gene_id)
ensg_ids <- c()
enst_ids <- c()
for (i in 1:length(uniq_gene_id)){
xloc_num <- merged_frame[merged_frame$gene_id == uniq_gene_id[i], ]
ensg_ids[i] <- paste(unique(xloc_num$ensembl_gene_id), collapse=',')
enst_ids[i] <- paste(unique(xloc_num$ensembl_transcript_id), collapse=',')
print(i)
}
fill_dtaframe <- data.frame(
uniq_gene_id=uniq_gene_id,
ensembl_gene_id=ensg_ids,
ensembl_transcript_id=enst_ids)
#dta to match
dta <- read.table(file="table_for_conversion.tsv",
sep="\t", header=TRUE)
merged_dta <- merge(x=dta,
y=fill_dtaframe,
by.x="gene_id",
by.y="uniq_gene_id")
write.table(merged_dta,
file="converted_table.tsv",
sv", quote=FALSE,
sep="\t")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment