Skip to content

Instantly share code, notes, and snippets.

@mtmorgan
Created April 7, 2016 09:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mtmorgan/ea10d0d424bf7e414d8e064d903f026d to your computer and use it in GitHub Desktop.
Save mtmorgan/ea10d0d424bf7e414d8e064d903f026d to your computer and use it in GitHub Desktop.
Query DisGeNET disease / gene database from R
#' Query DisGeNET disease / gene database
#'
#' Based on a script by jpinero@imim.es, retrieved from
#' http://www.disgenet.org/ds/DisGeNET/scripts/disgenet.R on 7 April,
#' 2016. This version is meant for interactive use within an R
#' session, and makes a single query to DisGeNET rather than one query
#' for each input symbol.
#'
#' @param input: character vector of gene or disease identifiers
#'
#' @param entity: character(1) specifying that the identifiers are
#' either "gene" or "disease"
#'
#' @param identifier: character(1) specifying the type of
#' identifier. If 'entity' is "gene", then 'identifier' can be
#' "entrez", or "hgnc". If 'entity' is "disease", 'identifier' can
#' be 'cui', 'mesh', or 'omim'.
#'
#' @examples
#' input <- c("CDK1", "CDK1A", "CDK2")
#' result <- DisGeNET(input, 'gene', 'hgnc')
#' head(result)
#'
DisGeNET <- function(input, entity = c("gene", "disease"), identifier) {
loadNamespace("httr")
stopifnot(is.character(input))
entity <- match.arg(entity)
stopifnot(is.character(identifier), length(identifier) == 1L)
STR <- switch(entity, gene={
if (!identifier %in% c("entrez", "hgnc"))
stop("entity='gene' 'identifier' must be 'entrez' or 'hgnc'")
if (identifier == "entrez")
"c2.geneId"
else # identifier = 'hgnc'
"c2.name"
}, disease={
if (!identifier %in% c("cui", "mesh", "omim"))
stop("entity='disease' 'identifier' must be 'cui', 'mesh' or 'omim'")
paste0("c1.", identifier)
})
url <- "http://www.disgenet.org/oql"
terms <- paste(sprintf("'%s'", input), collapse=", ")
oql <- paste0(
"DEFINE
c0='/data/gene_disease_score_onexus',
c1='/data/diseases',
c2='/data/genes',
c3='/data/sources'
ON
'http://bitbucket.org/janis_pi/disgenet_onexus.git'
SELECT
c1 (cui, name, diseaseClassName, STY, MESH, omimInt),
c2 (geneId, name, uniprotId, description, pathName, pantherName),
c0 (score, pmids)
FROM
c0
WHERE
(c3 = 'ALL' AND ", STR, " IN (", terms, ")
ORDER BY ",
STR, ", c0.score DESC")
response <- httr::POST(url, body=oql)
httr::stop_for_status(response)
tbl <- read.csv(text=httr::content(response), header=TRUE, sep="\t")
bad <- !input %in% tbl$c2.name
if (any(bad))
warning("entitites not in DisGeNET:\n ",
paste(sQuote(input[bad]), collapse=", "),
call.=FALSE)
tbl
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment