Created
May 5, 2016 20:50
-
-
Save lcolladotor/c500dd79d49aed1ef33ade5417111453 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library('rtracklayer') | |
## Try importing bigwig file http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw | |
x <- import.bw('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw', | |
as = 'RleList') | |
traceback() | |
## Find the effective url using http://stackoverflow.com/a/34383991 | |
unshorten_url <- function(uri) { | |
require('RCurl') | |
opts <- list( | |
followlocation = TRUE, # resolve redirects | |
ssl.verifyhost = FALSE, # suppress certain SSL errors | |
ssl.verifypeer = FALSE, | |
nobody = TRUE, # perform HEAD request | |
verbose = FALSE | |
) | |
curlhandle <- getCurlHandle(.opts = opts) | |
getURL(uri, curl = curlhandle) | |
info <- getCurlInfo(curlhandle) | |
rm(curlhandle) # release the curlhandle! | |
info$effective.url | |
} | |
url <- unshorten_url('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw') | |
url | |
## Try now with the effective url | |
x <- import.bw(url, as = 'RleList') | |
traceback() | |
## url() by default uses 'libcurl', which is what download.file() uses by | |
## default too and does not work with this url. | |
## https://github.com/Bioconductor-mirror/rtracklayer/blob/master/R/bigWig.R#L12-L16 | |
## only supports paths, not connections | |
bw <- BigWigFile(url) | |
x <- import.bw(bw, as = 'RleList') | |
traceback() | |
## Changing from https to http doesn't solve this either | |
url_http <- 'http://content-na.drive.amazonaws.com/cdproxy/templink/i_aQAPZJkJ9d9lN1NO5DJJtlbpvAdgbNuc1SkqSTHFouFiZq5' | |
x <- import.bw(url_http, as = 'RleList') | |
traceback() | |
bw2 <- BigWigFile(url_http) | |
x <- import.bw(bw2, as = 'RleList') | |
traceback() | |
## Note the original file can be downloaded using method = 'curl' | |
download.file('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw', | |
destfile = 'DRR000897.bw', method = 'curl', extra = '-L') | |
## While we can now import the data, we want to avoid doing this. Specially when | |
## using the 'selection' argument. | |
y <- import.bw('DRR000897.bw', as = 'RleList') | |
y | |
## Reproducibility info | |
message(Sys.time()) | |
options(width = 120) | |
devtools::session_info() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> library('rtracklayer') | |
Loading required package: GenomicRanges | |
Loading required package: BiocGenerics | |
Loading required package: parallel | |
Attaching package: ‘BiocGenerics’ | |
The following objects are masked from ‘package:parallel’: | |
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, | |
clusterExport, clusterMap, parApply, parCapply, parLapply, | |
parLapplyLB, parRapply, parSapply, parSapplyLB | |
The following objects are masked from ‘package:stats’: | |
IQR, mad, xtabs | |
The following objects are masked from ‘package:base’: | |
anyDuplicated, append, as.data.frame, cbind, colnames, do.call, | |
duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect, | |
is.unsorted, lapply, lengths, Map, mapply, match, mget, order, | |
paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, | |
Reduce, rownames, sapply, setdiff, sort, table, tapply, union, | |
unique, unsplit | |
Loading required package: S4Vectors | |
Loading required package: stats4 | |
Attaching package: ‘S4Vectors’ | |
The following objects are masked from ‘package:base’: | |
colMeans, colSums, expand.grid, rowMeans, rowSums | |
Loading required package: IRanges | |
Loading required package: GenomeInfoDb | |
> | |
> ## Try importing bigwig file http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw | |
> x <- import.bw('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw', | |
+ as = 'RleList') | |
traceback() | |
Error in seqinfo(ranges) : UCSC library operation failed | |
In addition: Warning message: | |
In seqinfo(ranges) : | |
Couldn't open http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw | |
> traceback() | |
13: .Call(BWGFile_seqlengths, path.expand(path(x))) | |
12: seqinfo(ranges) | |
11: seqinfo(ranges) | |
10: BigWigSelection(which, ...) | |
9: .class1(object) | |
8: as(selection, "BigWigSelection") | |
7: .local(con, format, text, ...) | |
6: import(FileForFormat(con, format), ...) | |
5: import(FileForFormat(con, format), ...) | |
4: import(con, "BigWig", ...) | |
3: import(con, "BigWig", ...) | |
2: import.bw("http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw", | |
as = "RleList") | |
1: import.bw("http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw", | |
as = "RleList") | |
> | |
> ## Find the effective url using http://stackoverflow.com/a/34383991 | |
> unshorten_url <- function(uri) { | |
+ require('RCurl') | |
+ opts <- list( | |
+ followlocation = TRUE, # resolve redirects | |
+ ssl.verifyhost = FALSE, # suppress certain SSL errors | |
+ ssl.verifypeer = FALSE, | |
+ nobody = TRUE, # perform HEAD request | |
+ verbose = FALSE | |
+ ) | |
+ curlhandle <- getCurlHandle(.opts = opts) | |
+ getURL(uri, curl = curlhandle) | |
+ info <- getCurlInfo(curlhandle) | |
+ rm(curlhandle) # release the curlhandle! | |
+ info$effective.url | |
+ } | |
> url <- unshorten_url('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw') | |
Loading required package: RCurl | |
Loading required package: bitops | |
> url | |
[1] "https://content-na.drive.amazonaws.com/cdproxy/templink/usTQCr2pAaI3tTps4AFQuz1H9kmm23EDYy39SQ3ke5EuFiZq5" | |
> | |
> ## Try now with the effective url | |
> x <- import.bw(url, as = 'RleList') | |
Error in seqinfo(ranges) : UCSC library operation failed | |
In addition: Warning message: | |
In seqinfo(ranges) : | |
No openssl available in netConnectHttps for content-na.drive.amazonaws.com : 443 | |
> traceback() | |
13: .Call(BWGFile_seqlengths, path.expand(path(x))) | |
12: seqinfo(ranges) | |
11: seqinfo(ranges) | |
10: BigWigSelection(which, ...) | |
9: .class1(object) | |
8: as(selection, "BigWigSelection") | |
7: .local(con, format, text, ...) | |
6: import(FileForFormat(con, format), ...) | |
5: import(FileForFormat(con, format), ...) | |
4: import(con, "BigWig", ...) | |
3: import(con, "BigWig", ...) | |
2: import.bw(url, as = "RleList") | |
1: import.bw(url, as = "RleList") | |
> | |
> ## url() by default uses 'libcurl', which is what download.file() uses by | |
> ## default too and does not work with this url. | |
> | |
> ## https://github.com/Bioconductor-mirror/rtracklayer/blob/master/R/bigWig.R#L12-L16 | |
> ## only supports paths, not connections | |
> bw <- BigWigFile(url) | |
> x <- import.bw(bw, as = 'RleList') | |
Error in seqinfo(ranges) : UCSC library operation failed | |
In addition: Warning message: | |
In seqinfo(ranges) : | |
No openssl available in netConnectHttps for content-na.drive.amazonaws.com : 443 | |
> traceback() | |
11: .Call(BWGFile_seqlengths, path.expand(path(x))) | |
10: seqinfo(ranges) | |
9: seqinfo(ranges) | |
8: BigWigSelection(which, ...) | |
7: .class1(object) | |
6: as(selection, "BigWigSelection") | |
5: .local(con, format, text, ...) | |
4: import(con, "BigWig", ...) | |
3: import(con, "BigWig", ...) | |
2: import.bw(bw, as = "RleList") | |
1: import.bw(bw, as = "RleList") | |
> | |
> ## Changing from https to http doesn't solve this either | |
> url_http <- 'http://content-na.drive.amazonaws.com/cdproxy/templink/i_aQAPZJkJ9d9lN1NO5DJJtlbpvAdgbNuc1SkqSTHFouFiZq5' | |
> x <- import.bw(url_http, as = 'RleList') | |
Error in seqinfo(ranges) : UCSC library operation failed | |
In addition: Warning messages: | |
1: In seqinfo(ranges) : | |
TCP non-blocking connect() to content-na.drive.amazonaws.com timed-out in select() after 10000 milliseconds - Cancelling! | |
2: In seqinfo(ranges) : | |
Couldn't open http://content-na.drive.amazonaws.com/cdproxy/templink/i_aQAPZJkJ9d9lN1NO5DJJtlbpvAdgbNuc1SkqSTHFouFiZq5 | |
> traceback() | |
13: .Call(BWGFile_seqlengths, path.expand(path(x))) | |
12: seqinfo(ranges) | |
11: seqinfo(ranges) | |
10: BigWigSelection(which, ...) | |
9: .class1(object) | |
8: as(selection, "BigWigSelection") | |
7: .local(con, format, text, ...) | |
6: import(FileForFormat(con, format), ...) | |
5: import(FileForFormat(con, format), ...) | |
4: import(con, "BigWig", ...) | |
3: import(con, "BigWig", ...) | |
2: import.bw(url_http, as = "RleList") | |
1: import.bw(url_http, as = "RleList") | |
> | |
> bw2 <- BigWigFile(url_http) | |
> x <- import.bw(bw2, as = 'RleList') | |
Error in seqinfo(ranges) : UCSC library operation failed | |
In addition: Warning messages: | |
1: In seqinfo(ranges) : | |
TCP non-blocking connect() to content-na.drive.amazonaws.com timed-out in select() after 10000 milliseconds - Cancelling! | |
2: In seqinfo(ranges) : | |
Couldn't open http://content-na.drive.amazonaws.com/cdproxy/templink/i_aQAPZJkJ9d9lN1NO5DJJtlbpvAdgbNuc1SkqSTHFouFiZq5 | |
> traceback() | |
11: .Call(BWGFile_seqlengths, path.expand(path(x))) | |
10: seqinfo(ranges) | |
9: seqinfo(ranges) | |
8: BigWigSelection(which, ...) | |
7: .class1(object) | |
6: as(selection, "BigWigSelection") | |
5: .local(con, format, text, ...) | |
4: import(con, "BigWig", ...) | |
3: import(con, "BigWig", ...) | |
2: import.bw(bw2, as = "RleList") | |
1: import.bw(bw2, as = "RleList") | |
> | |
> ## Note the original file can be downloaded using method = 'curl' | |
> download.file('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw', | |
+ destfile = 'DRR000897.bw', method = 'curl', extra = '-L') | |
% Total % Received % Xferd Average Speed Time Time Time Current | |
Dload Upload Total Spent Left Speed | |
100 417 100 417 0 0 122 0 0:00:03 0:00:03 --:--:-- 122 | |
100 50.0M 100 50.0M 0 0 5205k 0 0:00:09 0:00:09 --:--:-- 9363k | |
> | |
> ## While we can now import the data, we want to avoid doing this. Specially when | |
> ## using the 'selection' argument. | |
> y <- import.bw('DRR000897.bw', as = 'RleList') | |
> y | |
RleList of length 94 | |
$chr1 | |
numeric-Rle of length 248956422 with 1249470 runs | |
Lengths: 13378 37 366 37 25 ... 535 37 1028 37 13595 | |
Values : 0 1 0 1 0 ... 0 1 0 1 0 | |
$chr10 | |
numeric-Rle of length 133797422 with 412887 runs | |
Lengths: 47132 37 12 37 333 ... 211 37 2082 37 11003 | |
Values : 0 1 0 1 0 ... 0 1 0 1 0 | |
$chr11 | |
numeric-Rle of length 135086622 with 700940 runs | |
Lengths: 74243 37 22178 37 22907 ... 37 4395 37 17340 | |
Values : 0 1 0 1 0 ... 1 0 1 0 | |
$chr11_KI270721v1_random | |
numeric-Rle of length 100316 with 363 runs | |
Lengths: 2593 37 17 7 1 ... 876 37 492 37 47993 | |
Values : 0 1 0 1 2 ... 0 1 0 1 0 | |
$chr12 | |
numeric-Rle of length 133275309 with 628862 runs | |
Lengths: 10055 37 4101 37 316 ... 623 37 4860 37 64715 | |
Values : 0 1 0 1 0 ... 0 1 0 1 0 | |
... | |
<89 more elements> | |
> | |
> | |
> ## Reproducibility info | |
> message(Sys.time()) | |
2016-05-05 16:48:30 | |
> options(width = 120) | |
> devtools::session_info() | |
Session info ----------------------------------------------------------------------------------------------------------- | |
setting value | |
version R version 3.3.0 RC (2016-05-01 r70572) | |
system x86_64, darwin13.4.0 | |
ui X11 | |
language (EN) | |
collate en_US.UTF-8 | |
tz America/New_York | |
date 2016-05-05 | |
Packages --------------------------------------------------------------------------------------------------------------- | |
package * version date source | |
Biobase 2.32.0 2016-05-04 Bioconductor | |
BiocGenerics * 0.18.0 2016-05-04 Bioconductor | |
BiocParallel 1.6.0 2016-05-04 Bioconductor | |
Biostrings 2.40.0 2016-05-04 Bioconductor | |
bitops * 1.0-6 2013-08-17 CRAN (R 3.3.0) | |
colorout * 1.1-2 2016-05-05 Github (jalvesaq/colorout@6538970) | |
devtools 1.11.1 2016-04-21 CRAN (R 3.3.0) | |
digest 0.6.9 2016-01-08 CRAN (R 3.3.0) | |
GenomeInfoDb * 1.8.0 2016-05-04 Bioconductor | |
GenomicAlignments 1.8.0 2016-05-04 Bioconductor | |
GenomicRanges * 1.24.0 2016-05-04 Bioconductor | |
IRanges * 2.6.0 2016-05-04 Bioconductor | |
memoise 1.0.0 2016-01-29 CRAN (R 3.3.0) | |
RCurl * 1.95-4.8 2016-03-01 CRAN (R 3.3.0) | |
Rsamtools 1.24.0 2016-05-04 Bioconductor | |
rtracklayer * 1.32.0 2016-05-04 Bioconductor | |
S4Vectors * 0.10.0 2016-05-04 Bioconductor | |
SummarizedExperiment 1.2.0 2016-05-04 Bioconductor | |
withr 1.0.1 2016-02-04 CRAN (R 3.3.0) | |
XML 3.98-1.4 2016-03-01 CRAN (R 3.3.0) | |
XVector 0.12.0 2016-05-04 Bioconductor | |
zlibbioc 1.18.0 2016-05-04 Bioconductor | |
> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment