Skip to content

Instantly share code, notes, and snippets.

@lcolladotor
Created May 5, 2016 20:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lcolladotor/c500dd79d49aed1ef33ade5417111453 to your computer and use it in GitHub Desktop.
Save lcolladotor/c500dd79d49aed1ef33ade5417111453 to your computer and use it in GitHub Desktop.
library('rtracklayer')
## Try importing bigwig file http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw
x <- import.bw('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw',
as = 'RleList')
traceback()
## Find the effective url using http://stackoverflow.com/a/34383991
unshorten_url <- function(uri) {
require('RCurl')
opts <- list(
followlocation = TRUE, # resolve redirects
ssl.verifyhost = FALSE, # suppress certain SSL errors
ssl.verifypeer = FALSE,
nobody = TRUE, # perform HEAD request
verbose = FALSE
)
curlhandle <- getCurlHandle(.opts = opts)
getURL(uri, curl = curlhandle)
info <- getCurlInfo(curlhandle)
rm(curlhandle) # release the curlhandle!
info$effective.url
}
url <- unshorten_url('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw')
url
## Try now with the effective url
x <- import.bw(url, as = 'RleList')
traceback()
## url() by default uses 'libcurl', which is what download.file() uses by
## default too and does not work with this url.
## https://github.com/Bioconductor-mirror/rtracklayer/blob/master/R/bigWig.R#L12-L16
## only supports paths, not connections
bw <- BigWigFile(url)
x <- import.bw(bw, as = 'RleList')
traceback()
## Changing from https to http doesn't solve this either
url_http <- 'http://content-na.drive.amazonaws.com/cdproxy/templink/i_aQAPZJkJ9d9lN1NO5DJJtlbpvAdgbNuc1SkqSTHFouFiZq5'
x <- import.bw(url_http, as = 'RleList')
traceback()
bw2 <- BigWigFile(url_http)
x <- import.bw(bw2, as = 'RleList')
traceback()
## Note the original file can be downloaded using method = 'curl'
download.file('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw',
destfile = 'DRR000897.bw', method = 'curl', extra = '-L')
## While we can now import the data, we want to avoid doing this. Specially when
## using the 'selection' argument.
y <- import.bw('DRR000897.bw', as = 'RleList')
y
## Reproducibility info
message(Sys.time())
options(width = 120)
devtools::session_info()
> library('rtracklayer')
Loading required package: GenomicRanges
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: ‘BiocGenerics’
The following objects are masked from ‘package:parallel’:
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
clusterExport, clusterMap, parApply, parCapply, parLapply,
parLapplyLB, parRapply, parSapply, parSapplyLB
The following objects are masked from ‘package:stats’:
IQR, mad, xtabs
The following objects are masked from ‘package:base’:
anyDuplicated, append, as.data.frame, cbind, colnames, do.call,
duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect,
is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
Reduce, rownames, sapply, setdiff, sort, table, tapply, union,
unique, unsplit
Loading required package: S4Vectors
Loading required package: stats4
Attaching package: ‘S4Vectors’
The following objects are masked from ‘package:base’:
colMeans, colSums, expand.grid, rowMeans, rowSums
Loading required package: IRanges
Loading required package: GenomeInfoDb
>
> ## Try importing bigwig file http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw
> x <- import.bw('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw',
+ as = 'RleList')
traceback()
Error in seqinfo(ranges) : UCSC library operation failed
In addition: Warning message:
In seqinfo(ranges) :
Couldn't open http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw
> traceback()
13: .Call(BWGFile_seqlengths, path.expand(path(x)))
12: seqinfo(ranges)
11: seqinfo(ranges)
10: BigWigSelection(which, ...)
9: .class1(object)
8: as(selection, "BigWigSelection")
7: .local(con, format, text, ...)
6: import(FileForFormat(con, format), ...)
5: import(FileForFormat(con, format), ...)
4: import(con, "BigWig", ...)
3: import(con, "BigWig", ...)
2: import.bw("http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw",
as = "RleList")
1: import.bw("http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw",
as = "RleList")
>
> ## Find the effective url using http://stackoverflow.com/a/34383991
> unshorten_url <- function(uri) {
+ require('RCurl')
+ opts <- list(
+ followlocation = TRUE, # resolve redirects
+ ssl.verifyhost = FALSE, # suppress certain SSL errors
+ ssl.verifypeer = FALSE,
+ nobody = TRUE, # perform HEAD request
+ verbose = FALSE
+ )
+ curlhandle <- getCurlHandle(.opts = opts)
+ getURL(uri, curl = curlhandle)
+ info <- getCurlInfo(curlhandle)
+ rm(curlhandle) # release the curlhandle!
+ info$effective.url
+ }
> url <- unshorten_url('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw')
Loading required package: RCurl
Loading required package: bitops
> url
[1] "https://content-na.drive.amazonaws.com/cdproxy/templink/usTQCr2pAaI3tTps4AFQuz1H9kmm23EDYy39SQ3ke5EuFiZq5"
>
> ## Try now with the effective url
> x <- import.bw(url, as = 'RleList')
Error in seqinfo(ranges) : UCSC library operation failed
In addition: Warning message:
In seqinfo(ranges) :
No openssl available in netConnectHttps for content-na.drive.amazonaws.com : 443
> traceback()
13: .Call(BWGFile_seqlengths, path.expand(path(x)))
12: seqinfo(ranges)
11: seqinfo(ranges)
10: BigWigSelection(which, ...)
9: .class1(object)
8: as(selection, "BigWigSelection")
7: .local(con, format, text, ...)
6: import(FileForFormat(con, format), ...)
5: import(FileForFormat(con, format), ...)
4: import(con, "BigWig", ...)
3: import(con, "BigWig", ...)
2: import.bw(url, as = "RleList")
1: import.bw(url, as = "RleList")
>
> ## url() by default uses 'libcurl', which is what download.file() uses by
> ## default too and does not work with this url.
>
> ## https://github.com/Bioconductor-mirror/rtracklayer/blob/master/R/bigWig.R#L12-L16
> ## only supports paths, not connections
> bw <- BigWigFile(url)
> x <- import.bw(bw, as = 'RleList')
Error in seqinfo(ranges) : UCSC library operation failed
In addition: Warning message:
In seqinfo(ranges) :
No openssl available in netConnectHttps for content-na.drive.amazonaws.com : 443
> traceback()
11: .Call(BWGFile_seqlengths, path.expand(path(x)))
10: seqinfo(ranges)
9: seqinfo(ranges)
8: BigWigSelection(which, ...)
7: .class1(object)
6: as(selection, "BigWigSelection")
5: .local(con, format, text, ...)
4: import(con, "BigWig", ...)
3: import(con, "BigWig", ...)
2: import.bw(bw, as = "RleList")
1: import.bw(bw, as = "RleList")
>
> ## Changing from https to http doesn't solve this either
> url_http <- 'http://content-na.drive.amazonaws.com/cdproxy/templink/i_aQAPZJkJ9d9lN1NO5DJJtlbpvAdgbNuc1SkqSTHFouFiZq5'
> x <- import.bw(url_http, as = 'RleList')
Error in seqinfo(ranges) : UCSC library operation failed
In addition: Warning messages:
1: In seqinfo(ranges) :
TCP non-blocking connect() to content-na.drive.amazonaws.com timed-out in select() after 10000 milliseconds - Cancelling!
2: In seqinfo(ranges) :
Couldn't open http://content-na.drive.amazonaws.com/cdproxy/templink/i_aQAPZJkJ9d9lN1NO5DJJtlbpvAdgbNuc1SkqSTHFouFiZq5
> traceback()
13: .Call(BWGFile_seqlengths, path.expand(path(x)))
12: seqinfo(ranges)
11: seqinfo(ranges)
10: BigWigSelection(which, ...)
9: .class1(object)
8: as(selection, "BigWigSelection")
7: .local(con, format, text, ...)
6: import(FileForFormat(con, format), ...)
5: import(FileForFormat(con, format), ...)
4: import(con, "BigWig", ...)
3: import(con, "BigWig", ...)
2: import.bw(url_http, as = "RleList")
1: import.bw(url_http, as = "RleList")
>
> bw2 <- BigWigFile(url_http)
> x <- import.bw(bw2, as = 'RleList')
Error in seqinfo(ranges) : UCSC library operation failed
In addition: Warning messages:
1: In seqinfo(ranges) :
TCP non-blocking connect() to content-na.drive.amazonaws.com timed-out in select() after 10000 milliseconds - Cancelling!
2: In seqinfo(ranges) :
Couldn't open http://content-na.drive.amazonaws.com/cdproxy/templink/i_aQAPZJkJ9d9lN1NO5DJJtlbpvAdgbNuc1SkqSTHFouFiZq5
> traceback()
11: .Call(BWGFile_seqlengths, path.expand(path(x)))
10: seqinfo(ranges)
9: seqinfo(ranges)
8: BigWigSelection(which, ...)
7: .class1(object)
6: as(selection, "BigWigSelection")
5: .local(con, format, text, ...)
4: import(con, "BigWig", ...)
3: import(con, "BigWig", ...)
2: import.bw(bw2, as = "RleList")
1: import.bw(bw2, as = "RleList")
>
> ## Note the original file can be downloaded using method = 'curl'
> download.file('http://duffel.rail.bio/recount/DRP000366/bw/DRR000897.bw',
+ destfile = 'DRR000897.bw', method = 'curl', extra = '-L')
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 417 100 417 0 0 122 0 0:00:03 0:00:03 --:--:-- 122
100 50.0M 100 50.0M 0 0 5205k 0 0:00:09 0:00:09 --:--:-- 9363k
>
> ## While we can now import the data, we want to avoid doing this. Specially when
> ## using the 'selection' argument.
> y <- import.bw('DRR000897.bw', as = 'RleList')
> y
RleList of length 94
$chr1
numeric-Rle of length 248956422 with 1249470 runs
Lengths: 13378 37 366 37 25 ... 535 37 1028 37 13595
Values : 0 1 0 1 0 ... 0 1 0 1 0
$chr10
numeric-Rle of length 133797422 with 412887 runs
Lengths: 47132 37 12 37 333 ... 211 37 2082 37 11003
Values : 0 1 0 1 0 ... 0 1 0 1 0
$chr11
numeric-Rle of length 135086622 with 700940 runs
Lengths: 74243 37 22178 37 22907 ... 37 4395 37 17340
Values : 0 1 0 1 0 ... 1 0 1 0
$chr11_KI270721v1_random
numeric-Rle of length 100316 with 363 runs
Lengths: 2593 37 17 7 1 ... 876 37 492 37 47993
Values : 0 1 0 1 2 ... 0 1 0 1 0
$chr12
numeric-Rle of length 133275309 with 628862 runs
Lengths: 10055 37 4101 37 316 ... 623 37 4860 37 64715
Values : 0 1 0 1 0 ... 0 1 0 1 0
...
<89 more elements>
>
>
> ## Reproducibility info
> message(Sys.time())
2016-05-05 16:48:30
> options(width = 120)
> devtools::session_info()
Session info -----------------------------------------------------------------------------------------------------------
setting value
version R version 3.3.0 RC (2016-05-01 r70572)
system x86_64, darwin13.4.0
ui X11
language (EN)
collate en_US.UTF-8
tz America/New_York
date 2016-05-05
Packages ---------------------------------------------------------------------------------------------------------------
package * version date source
Biobase 2.32.0 2016-05-04 Bioconductor
BiocGenerics * 0.18.0 2016-05-04 Bioconductor
BiocParallel 1.6.0 2016-05-04 Bioconductor
Biostrings 2.40.0 2016-05-04 Bioconductor
bitops * 1.0-6 2013-08-17 CRAN (R 3.3.0)
colorout * 1.1-2 2016-05-05 Github (jalvesaq/colorout@6538970)
devtools 1.11.1 2016-04-21 CRAN (R 3.3.0)
digest 0.6.9 2016-01-08 CRAN (R 3.3.0)
GenomeInfoDb * 1.8.0 2016-05-04 Bioconductor
GenomicAlignments 1.8.0 2016-05-04 Bioconductor
GenomicRanges * 1.24.0 2016-05-04 Bioconductor
IRanges * 2.6.0 2016-05-04 Bioconductor
memoise 1.0.0 2016-01-29 CRAN (R 3.3.0)
RCurl * 1.95-4.8 2016-03-01 CRAN (R 3.3.0)
Rsamtools 1.24.0 2016-05-04 Bioconductor
rtracklayer * 1.32.0 2016-05-04 Bioconductor
S4Vectors * 0.10.0 2016-05-04 Bioconductor
SummarizedExperiment 1.2.0 2016-05-04 Bioconductor
withr 1.0.1 2016-02-04 CRAN (R 3.3.0)
XML 3.98-1.4 2016-03-01 CRAN (R 3.3.0)
XVector 0.12.0 2016-05-04 Bioconductor
zlibbioc 1.18.0 2016-05-04 Bioconductor
>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment