Created
October 2, 2017 17:12
-
-
Save b-klaus/f2a502109f852702bf7efc5ad5c0bd11 to your computer and use it in GitHub Desktop.
fixed getAE function for the Bioc ArrayExpress that allows the usage of https
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We have to change to https in `getAE` | |
# to https as EBI services have switched the protocol on Oct 1st | |
# see: https://www.ebi.ac.uk/services | |
# The package XML does not support https though, so we need RCurl::getURL: | |
# https://stackoverflow.com/questions/23584514/error-xml-content-does-not-seem-to-be-xml-r-3-1-0 | |
https_getAE <- function (accession, path = getwd(), type = "full", extract = TRUE, | |
local = FALSE, sourcedir = path) | |
{ | |
if (!local) { | |
baseURL = "https://www.ebi.ac.uk/arrayexpress/xml/v2/files" | |
xmlURL = getURL(paste(baseURL, accession, sep = "/")) | |
xml = xmlTreeParse(xmlURL, useInternalNodes = TRUE, isURL=FALSE) | |
sdrfURL = xpathSApply(xml, "/files/experiment/file[kind='sdrf' and extension='txt']/url", | |
xmlValue) | |
sdrfFile = xpathSApply(xml, "/files/experiment/file[kind='sdrf' and extension='txt']/name", | |
xmlValue) | |
idfURL = xpathSApply(xml, "/files/experiment/file[kind='idf' and extension='txt']/url", | |
xmlValue) | |
idfFile = xpathSApply(xml, "/files/experiment/file[kind='idf' and extension='txt']/name", | |
xmlValue) | |
adfURL = xpathApply(xml, "/files/experiment/file[kind='adf' and extension='txt']/url", | |
xmlValue) | |
adfFiles = xpathApply(xml, "/files/experiment/file[kind='adf' and extension='txt']/name", | |
xmlValue) | |
rawArchiveURL = xpathApply(xml, "/files/experiment/file[kind='raw' and extension='zip']/url", | |
xmlValue) | |
procArchiveURL = xpathApply(xml, "/files/experiment/file[kind='processed' and extension='zip']/url", | |
xmlValue) | |
} | |
else { | |
allfiles = list.files(sourcedir) | |
sdrfFile = allfiles[grep(paste(accession, ".sdrf.txt$", | |
sep = ""), allfiles)] | |
if (length(sdrfFile) == 0) | |
stop("SDRF file not found in directory ", sourcedir) | |
sdrfURL = paste("file:/", sourcedir, sdrfFile, sep = "/") | |
idfFile = allfiles[grep(paste(accession, ".idf.txt$", | |
sep = ""), allfiles)] | |
if (length(idfFile) == 0) | |
warning("IDF file not found in directory ", sourcedir) | |
idfURL = paste("file:/", sourcedir, idfFile, sep = "/") | |
ph = try(read.AnnotatedDataFrame(sdrfFile, path = sourcedir, | |
row.names = NULL, blank.lines.skip = TRUE, fill = TRUE, | |
varMetadata.char = "$")) | |
if (inherits(ph, "try-error")) { | |
warning("Unable to retrieve ADF reference from SDRF. Reading any ADF in directory.") | |
adfFiles = allfiles[grep(".adf.txt$", allfiles)] | |
} | |
else { | |
adr = unique(pData(ph)[, getSDRFcolumn("ArrayDesignREF", | |
varLabels(ph))]) | |
adfFiles = paste(adr, ".adf.txt", sep = "") | |
} | |
if (all(file.exists(file.path(sourcedir, adfFiles)))) { | |
adfURL = paste("file:/", sourcedir, adfFiles, sep = "/") | |
downloadADF = FALSE | |
} | |
else { | |
filesURL = "https://www.ebi.ac.uk/arrayexpress/files" | |
adfURL = paste(filesURL, adr, adfFiles, sep = "/") | |
downloadADF = TRUE | |
} | |
rawArchiveURL = NULL | |
procArchiveURL = NULL | |
rawArchive = allfiles[grep(paste(accession, ".raw.[0-9]{1,}.zip", | |
sep = ""), allfiles)] | |
if (length(rawArchive) != 0) | |
rawArchiveURL = paste("file:/", sourcedir, rawArchive, | |
sep = "/") | |
else warning("No raw files found in directory ", sourcedir) | |
processedArchive = allfiles[grep(paste(accession, ".processed.[0-9]{1,}.zip", | |
sep = ""), allfiles)] | |
if (length(processedArchive) != 0) | |
procArchiveURL = paste("file:/", sourcedir, processedArchive, | |
sep = "/") | |
else warning("No processed data files found in directory ", | |
sourcedir) | |
} | |
if (length(sdrfURL) > 1) { | |
warning("Found two SDRF files: \n", paste(sdrfURL, "\n")) | |
hybSDRF = grep("hyb.sdrf", sdrfURL) | |
if (length(hybSDRF) > 0) { | |
message("Choosing ", sdrfURL[hybSDRF]) | |
sdrfURL = sdrfURL[hybSDRF] | |
sdrfFile = sdrfFile[hybSDRF] | |
} | |
else { | |
warning("Unable to choose SDRF file. Please report experiment to miamexpress@ebi.ac.uk") | |
} | |
} | |
if (!local || path != sourcedir || downloadADF) { | |
adfFiles <- lapply(adfURL, function(url) { | |
filedest = paste(path, basename(url), sep = "/") | |
dnld = try(download.file(url, filedest, mode = "wb")) | |
if (inherits(dnld, "try-error") || file.info(filedest)$size == | |
0) { | |
warning(paste(url, " does not exist or is empty. \n"), | |
sep = "") | |
adffile = NULL | |
} | |
else { | |
adffile = basename(filedest) | |
} | |
return(adffile) | |
}) | |
if (!is.null(adfFiles)) | |
adfFiles = unlist(adfFiles) | |
} | |
if (!local || path != sourcedir) { | |
sdrfFileDest = paste(path, sdrfFile, sep = "/") | |
dnld = try(download.file(sdrfURL, sdrfFileDest, mode = "wb")) | |
if (inherits(dnld, "try-error") || file.info(sdrfFileDest)$size == | |
0) { | |
warning(paste(sdrfFile, " does not exist or is empty. The object will not have featureData or phenoData. \n"), | |
sep = "") | |
sdrfFile = NULL | |
adffile = NULL | |
} | |
idfFileDest = paste(path, idfFile, sep = "/") | |
dnld = try(download.file(idfURL, idfFileDest, mode = "wb")) | |
if (inherits(dnld, "try-error") || file.info(idfFileDest)$size == | |
0) { | |
warning(paste(idfFile, " does not exist or is empty. \n"), | |
sep = "") | |
idfFile = NULL | |
} | |
rawArchive = NULL | |
processedArchive = NULL | |
if (type != "mageFilesOnly" && !is.null(rawArchiveURL) && | |
(type == "full" || type == "raw")) { | |
message("Copying raw data files\n") | |
rawArchive <- lapply(rawArchiveURL, function(url) { | |
filedest = paste(path, basename(url), sep = "/") | |
dnld = try(download.file(url, filedest, mode = "wb")) | |
if (inherits(dnld, "try-error") || file.info(filedest)$size == | |
0) { | |
warning(paste(url, " does not exist or is empty. \n"), | |
sep = "") | |
} | |
else { | |
return(filedest) | |
} | |
}) | |
if (!is.null(rawArchive)) { | |
rawArchive = unlist(rawArchive) | |
rawArchive = basename(rawArchive) | |
} | |
} | |
if ((type != "mageFilesOnly" && type == "full" || type == | |
"processed") && !is.null(procArchiveURL)) { | |
message("Copying processed data files\n") | |
processedArchive <- lapply(procArchiveURL, function(url) { | |
filedest = paste(path, basename(url), sep = "/") | |
dnld = try(download.file(url, filedest, mode = "wb")) | |
if (inherits(dnld, "try-error") || file.info(filedest)$size == | |
0) { | |
warning(paste(url, " does not exist or is empty. \n"), | |
sep = "") | |
} | |
else { | |
return(filedest) | |
} | |
}) | |
if (!is.null(processedArchive)) { | |
processedArchive = unlist(processedArchive) | |
processedArchive = basename(processedArchive) | |
} | |
} | |
} | |
rawFiles = NULL | |
processedFiles = NULL | |
if (extract) { | |
message("Unpacking data files") | |
if (!is.null(rawArchive)) | |
rawFiles <- lapply(rawArchive, function(zipfile) { | |
rawfiles = extract.zip(file = paste(path, zipfile, | |
sep = "/")) | |
return(rawfiles) | |
}) | |
if (!is.null(processedArchive)) | |
processedFiles <- lapply(processedArchive, function(zipfile) { | |
procfiles = extract.zip(file = paste(path, zipfile, | |
sep = "/")) | |
return(procfiles) | |
}) | |
if (!is.null(rawFiles)) | |
rawFiles = unlist(rawFiles) | |
if (!is.null(processedFiles)) | |
processedFiles = unlist(processedFiles) | |
} | |
res = list(path = path, rawFiles = rawFiles, rawArchive = rawArchive, | |
processedFiles = processedFiles, processedArchive = processedArchive, | |
sdrf = sdrfFile, idf = idfFile, adf = adfFiles) | |
return(res) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment