Skip to content

Instantly share code, notes, and snippets.

@Ray901
Created May 15, 2016 08:19
Show Gist options
  • Save Ray901/0f44d4f7e1755aca0517d3c764865754 to your computer and use it in GitHub Desktop.
Save Ray901/0f44d4f7e1755aca0517d3c764865754 to your computer and use it in GitHub Desktop.
rm(list=ls())
library(curl)
library(rvest)
setUrl <- "http://tisvcloud.freeway.gov.tw/"
#######################################################
gethtmlTableUrl <- function(seturl) {
htmlTableData <- seturl %>%
read_html() %>%
html_nodes(xpath="//td/a") %>%
html_attr("href")
htmlTableData <- htmlTableData[!grepl("/history/TDCS/",htmlTableData) & !duplicated(htmlTableData)]
htmlTableUrl <- paste0(seturl,htmlTableData)
return(htmlTableUrl)
}
#######################################################
urlList <- NULL
etcTableData <- setUrl %>%
read_html() %>%
html_nodes(xpath="//td/a") %>%
html_attr("href")
etcTableData <- gsub("/history","history",etcTableData)
etcfileUrl <- paste0(setUrl,etcTableData[grep("xml.gz",etcTableData)])
etcTableUrl <- paste0(setUrl,etcTableData[grep("TDCS",etcTableData)])
for (i in 1:length(etcTableUrl)) {
setFieldName <- gsub("/","",gsub(setUrl,"",etcTableUrl[1]))
urlList[[setFieldName]] <- NULL
etcTableDayData <- etcTableUrl[1] %>%
read_html() %>%
html_nodes(xpath="//td/a") %>%
html_attr("href")
etcTableDayData <- etcTableDayData[!grepl("/history/TDCS/",etcTableDayData) & !duplicated(etcTableDayData)]
etcTableDayDataUrl <- paste0(etcTableUrl[1],etcTableDayData[grep(".tar.gz",etcTableDayData)])
etcTableDayDir <- etcTableDayData[grep("^2016",etcTableDayData)]
etcTableDayDirUrl <- paste0(etcTableUrl[1],etcTableDayDir)
etcTableHourDirUrl <- unlist(lapply(etcTableDayDirUrl,gethtmlTableUrl))
etcTableHourDataUrl <- unlist(lapply(etcTableHourDirUrl,gethtmlTableUrl))
urlList[[setFieldName]] <- c(etcTableDayDataUrl,etcTableHourDataUrl)
}
totalUrl <- c(etcfileUrl,unlist(urlList))
# for (i in 1:length(totalUrl)) {
# download.file(url=totalUrl[1],
# destfile=fileName)
# Sys.sleep(1)
# }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment