##### use bioconductor annotation packages ####### source("http://Bioconductor.org/biocLite.R") biocLite("org.Hs.eg.db") biocLite(c("GenomicFeatures", "AnnotationDbi")) library("org.Hs.eg.db") library("AnnotationDbi") library("GenomicFeatures") # all the possible mappings ls("package:org.Hs.eg.db") # convert Entrez_ids to gene_symbols myEntrez_ids <- c("1","10","100","1000","37690") mySymbols<- mget(myEntrez_ids, org.Hs.egSYMBOL, ifnotfound=NA) mySymbols unlist(mySymbols) # convert gene_symbols to Entrez_ids mySymbols_2 <- c("VEGFA","CTCF", "SNAI1","KDM1A") myEntrez_ids_2<- mget(mySymbols_2, org.Hs.egSYMBOL2EG, ifnotfound=NA) unlist(myEntrez_ids_2) ?AnnotationDbi::mget # get help # or use the select function ?AnnotationDbi::select head(keys(org.Hs.eg.db)) keytypes(org.Hs.eg.db) select(org.Hs.eg.db, keys = mySymbols_2, columns=c("SYMBOL","REFSEQ","GENENAME","ENTREZID"),keytype="SYMBOL") select(org.Hs.eg.db, keys = myEntrez_ids, columns=c("SYMBOL","REFSEQ","GENENAME","ENTREZID"),keytype="ENTREZID") # How many gene symbols symbol <- keys(org.Hs.eg.db, "SYMBOL") length(symbol) ############### use biomart ################### library(biomaRt) mart<- useMart(biomart = 'ensembl', dataset = 'hsapiens_gene_ensembl') # get sequences seq <- getSequence(id = 'BRCA1', type='hgnc_symbol',seqType="3utr", mart = mart) # pretty slow... show(seq) seq2 <-getSequence(id="ENST00000520540",type='ensembl_transcript_id',seqType='gene_flank', upstream =30, mart=mart) show(seq2) # convert gene ids gene symbol to refseq geneList<- c("VEGFA","CTCF", "SNAI1","KDM1A") results<- getBM(attributes = c("refseq_mrna","hgnc_symbol"), filters="hgnc_symbol", values=geneList, mart=mart) results ?getBM