Original code
url <- c("https://www.r-project.org/","http://bioconductor.org/")
result <- lapply(url, function(f) {
# TODO: Consider if we want this in the new function
# message(basename(f))
h <- RCurl::basicTextGatherer()
ok <- RCurl::curlPerform(url=f,
nobody=TRUE, headerfunction=h$update)
yy <- h$value()
list(date=sub(".*Last-Modified: ([[:print:]]+) GMT.*", "\\1", yy),
size=sub(".*Content-Length: ([[:digit:]]+).*","\\1", yy))
})
date <- strptime(sapply(result, "[[", "date"),
"%a, %d %b %Y %H:%M:%S", tz="GMT")
size <- as.integer(sapply(result, "[[", "size"))
data.frame(url, date, size, stringsAsFactors=FALSE)
Result :
url date size
1 https://www.r-project.org/ 2015-08-14 09:10:02 4835
2 http://bioconductor.org/ 2015-11-05 21:00:29 15738
New code
url = c("https://www.r-project.org/","http://bioconductor.org/")
result <- lapply(url, function(innerUrl) {
yy <- httr::HEAD(innerUrl)
list(date=yy$headers["last-modified"],
size=yy$headers["content-length"])
})
date <- unlist(lapply(result, function(x) x$date$`last-modified`))
date <- strptime(date, "%a, %d %b %Y %H:%M:%S", "GMT")
size <- unlist(lapply(result, function(x) x$size$`content-length`))
size <- as.integer(size)
data.frame(url, date, size, stringsAsFactors=FALSE)
Result :
url date size
1 https://www.r-project.org/ 2015-08-14 09:10:02 1820
2 http://bioconductor.org/ 2015-11-05 21:00:29 4246