Skip to content

Instantly share code, notes, and snippets.

@CaptainJH
Created March 24, 2014 02:39
Show Gist options
  • Save CaptainJH/9733283 to your computer and use it in GitHub Desktop.
Save CaptainJH/9733283 to your computer and use it in GitHub Desktop.
Get content from a HTML page
library(RCurl)
library(stringr)
RootURL <- "http://10.130.19.139/19659/"
DatFileURLs <- c()
CheckDatFolder <- function(folder)
{
URL <- sprintf("%s%s/", RootURL, folder)
html <- getURL(URL)
html_lines <- strsplit(html, '\n')[[1]]
ret <- str_extract(html_lines, 'bill-[0-9]+-[0-9]+-[0-9]+-[0-9]+\\.dat')
for(l in 1:length(ret))
{
if(is.na(ret[l]))
{
next
}
DatFileURLs[length(DatFileURLs) + 1] <- sprintf("%s%s", URL, ret[l])
print(tail(DatFileURLs, n=1))
}
return (DatFileURLs)
}
CheckDatFolderToday <- function()
{
today <- as.character(Sys.Date())
return (CheckDatFolder(today))
}
GetDatFileText <- function(file)
{
return (getURL(file))
}
ParseDatFile <- function(text)
{
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment