Skip to content

Instantly share code, notes, and snippets.

@Dudemanguy
Created October 18, 2017 16:36
Show Gist options
  • Save Dudemanguy/675568d6b8270fbc6eef094eb1a897e0 to your computer and use it in GitHub Desktop.
Save Dudemanguy/675568d6b8270fbc6eef094eb1a897e0 to your computer and use it in GitHub Desktop.
Script for quickly searching and fetching terms from GO database
suppressPackageStartupMessages(library(GO.db))
suppressPackageStartupMessages(library(biomaRt))
#script for quickly searching and fetching terms from GO database
query <- readline("Input your search query in regex. \n")
zz <- Ontology(GOTERM)
goterms <- unlist(Term(GOTERM))
whmf <- grep(query, ignore.case=TRUE, goterms)
df <- data.frame(goterms[whmf])
goterm_list <- rownames(df)
#get child terms
a <- as.list(GOBPOFFSPRING)
b <- as.list(GOCCOFFSPRING)
c <- as.list(GOMFOFFSPRING)
a <- a[goterm_list]
b <- b[goterm_list]
c <- c[goterm_list]
combined <- c(unlist(a), unlist(b), unlist(c), goterm_list)
unique_terms <- unique(combined)
#convert to refseq
ensembl <- useMart("ensembl")
ensembl <- useDataset("hsapiens_gene_ensembl", mart=ensembl)
mrna <- getBM(attributes=c("go_id", "refseq_mrna"), filters="go", values=c(unique_terms), mart=ensembl)
ncrna <- getBM(attributes=c("go_id", "refseq_ncrna"), filters="go", values=c(unique_terms), mart=ensembl)
mrna_char <- mrna[,2]
ncrna_char <- ncrna[,2]
transcript_id <- unique(c(mrna_char, ncrna_char))
#import expressed transcript table and intersect with query results
x <- read.delim("input_table.txt")
y <- x[x$transcript_ID %in% transcript_id,]
rownames(y) <- y[,1]
y <- y[-(1)]
write.table(y, file="expressed_genes.txt", sep="\t", quote=FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment