Skip to content

Instantly share code, notes, and snippets.

@srvanderplas
Created April 26, 2019 15:50
Show Gist options
  • Save srvanderplas/3039f0c664fca0a4427c18889cccf512 to your computer and use it in GitHub Desktop.
Save srvanderplas/3039f0c664fca0a4427c18889cccf512 to your computer and use it in GitHub Desktop.
Scrape Power Pool Data (from 2014)
#!/usr/bin/Rscript
# Scrape Southwest Power Pool LMP/SMP/MCC
library(scrapeR)
library(stringr)
library(lubridate)
library(ggplot2)
library(RMySQL)
library(plyr)
library(reshape2)
con <- dbConnect(MySQL(), user="susan", dbname="susan")
table <- xmlToDataFrame("http://www.spp.org/XML/LIP-Pricing.xml", stringsAsFactors=F)
d2 <- dbReadTable(con, "SWPower")
d2 <- unique(rbind(table, d2))
d <- dbWriteTable(con, "SWPower", d2, append=FALSE, overwrite=TRUE)
dbDisconnect(con)
# Scrape MISO Pool LMP/MCC/MLC
con <- dbConnect(MySQL(), user="susan", dbname="susan")
tmp <- scrape("https://www.misoenergy.org/ria/Consolidated.aspx?format=xml")[[1]]
tmp2 <- getNodeSet(tmp, "//*/lmpdata")[[1]]
date <- xmlAttrs(getNodeSet(tmp, "//*/lmpdata")[[1]])[["refid"]]
table <- ldply(xmlChildren(xmlChildren(tmp2)[[1]]), function(i) as.data.frame(t(xmlAttrs(i)), stringsAsFactors=F))
table$.id <- date
names(table)[1] <- "date"
d2 <- dbReadTable(con, "MisoPower")
d2 <- unique(rbind(table, d2))
d <- dbWriteTable(con, "MisoPower", d2, append=FALSE, overwrite=TRUE)
dbDisconnect(con)
# Scrape CAISO (california) LMP
date <- Sys.time()
tz(date) <- "America/Los_Angeles"
hour(date) <- hour(date)+2
date <- floor_date(date, "minute")
tmp <- scrape("http://oasis.caiso.com/mrtu-oasis/lmp/RTM/POINTMap.html")[[1]]
tmp2 <- getNodeSet(tmp, "/html/body/div[1]/div/map")
table <- ldply(xmlChildren(tmp2[[1]]), function(i){
x <- as.data.frame(t(xmlAttrs(i)))
if(grepl("County:", x$alt)) return(data.frame())
x$Node <- gsub("Node: ", "", word(x$alt, sep="\\r"))
# x$NodeType <- gsub("Node Type: ", "", word(x$alt, 2, sep="\\r\\n"))
x$MCP <- gsub("MARGINALCLEARINGPRICE: ", "", word(x$alt, 3, sep="\\r\\n"))
x$CONGESTLMP <- gsub("CONGESTLMP: ", "", word(x$alt, 5, sep="\\r\\n"))
x$COSTLMP <- gsub("COSTLMP: ", "", word(x$alt, 6, sep="\\r\\n"))
x$LOSSLMP <- gsub("LOSSLMP: ", "", word(x$alt, 7, sep="\\r\\n"))
return(data.frame(date=date, node=x$Node, mcp=x$MCP, congestlmp=x$CONGESTLMP, costlmp=x$COSTLMP, losslmp=x$LOSSLMP, stringsAsFactors=FALSE))
} )
table <- table[,-1]
con <- dbConnect(MySQL(), user="susan", dbname="susan")
d2 <- dbReadTable(con, "CaisoPower")
d2 <- unique(rbind(table, d2))
d <- dbWriteTable(con, "CaisoPower", d2, append=FALSE, overwrite=TRUE)
dbDisconnect(con)
#Scrape ERCOT - Texas power pool
con <- dbConnect(MySQL(), user="susan", dbname="susan")
tmp <- scrape("http://www.ercot.com/content/cdr/contours/rtmLmpHg.html")[[1]]
tmp2 <- getNodeSet(tmp, "//*/map")[[1]]
date <- gsub("Last Updated: ", "", xmlValue(getNodeSet(tmp2, "//*[@class='datestamp']")[[1]]))
table <- ldply(xmlChildren(tmp2), function(i){
x <- xmlAttrs(i)[["title"]]
data.frame(date=date, location=word(x, sep=":"), lmp=as.numeric(word(x, 2, sep="\\$")), stringsAsFactors=FALSE)
})
table <- table[,-1]
d2 <- dbReadTable(con, "ErcotPower")
d2 <- unique(rbind(table, d2))
d <- dbWriteTable(con, "ErcotPower", d2, append=FALSE, overwrite=TRUE)
dbDisconnect(con)
# Scrape NEISO - Northeast Power pool
con <- dbConnect(MySQL(), user="susan", dbname="susan")
url <- "http://www.iso-ne.com/markets/5min_data/fiveMinLMP.do?submit=latest"
data <- read.csv(url, skip=5, header=FALSE, stringsAsFactors=FALSE)
header <- read.csv(url, skip=2, nrows=1, header=FALSE, stringsAsFactors=FALSE)
names(data) <- header
ids <- read.csv("http://www.iso-ne.com/histRpts/rt-lmp/lmp_rt_final_20140205.csv", skip=5, stringsAsFactors=FALSE)
ids <- unique(ids[,4:6])
names(ids) <- c("ID", "NodeName", "NodeType")
data <- merge(data, ids, by.x="Location ID", by.y="ID")
names(data) <- c("ID", "isData", "DateTime", "Energy", "Congestion", "Loss", "LMP", "NodeName", "NodeType")
data <- data[,-2]
d2 <- dbReadTable(con, "NEisoPower")
d2 <- unique(rbind(data, d2))
d <- dbWriteTable(con, "NEisoPower", d2, append=FALSE, overwrite=TRUE)
dbDisconnect(con)
#PJM (Pennsylvania Jersey etc)
url <- "http://www.pjm.com/pub/account/lmpgen/lmppost.html"
tmp <- scrape(url)[[1]]
date <- xmlValue(getNodeSet(tmp, "/html/body/center[2]/table/tr/td/font/b")[[1]])
table1 <- xmlToDataFrame(getNodeSet(tmp, "/html/body/center[4]/table")[[1]], stringsAsFactors=FALSE)
table1 <- do.call("cbind", lapply(1:ncol(table1), function(k) str_trim(table1[,k])))
table1 <- as.data.frame(table1, stringsAsFactors=FALSE)
names(table1) <- table1[1,]
table1 <- table1[-1,]
table1 <- table1[,-which(apply(table1, 2, function(i) sum(nchar(i)!=0))==0)]
names(table1) <- c("NodeName", "NodeType", "LMP", "HourlyLMPHr22")
table1$LMP <- as.numeric(table1$LMP)
table1$HourlyLMPHr22 <- as.numeric(table1$HourlyLMPHr22)
table2 <- xmlToDataFrame(getNodeSet(tmp, "/html/body/center[6]/table")[[1]], stringsAsFactors=FALSE)
table2 <- do.call("cbind", lapply(1:ncol(table2), function(k) str_trim(table2[,k])))
table2 <- as.data.frame(table2, stringsAsFactors=FALSE)
names(table2) <- table2[1,]
table2 <- table2[-1,]
table2 <- table2[,-which(apply(table2, 2, function(i) sum(nchar(i)!=0))==0)]
table2$NodeType <- "Bus"
names(table2) <- c("NodeName", "LMP", "HourlyLMPHr22", "NodeType")
table2$LMP <- as.numeric(table2$LMP)
table2$HourlyLMPHr22 <- as.numeric(table2$HourlyLMPHr22)
table <- rbind.fill(table1, table2)
table$date <- date
con <- dbConnect(MySQL(), user="susan", dbname="susan")
d2 <- dbReadTable(con, "PjmPower")
d2 <- unique(rbind(table, d2))
d <- dbWriteTable(con, "PjmPower", d2, append=FALSE, overwrite=TRUE)
dbDisconnect(con)
# NYISO LMP
url <- "http://mis.nyiso.com/public/realtime/realtime_gen_lbmp.csv"
data <- read.csv(url, stringsAsFactors=FALSE)
names(data) <- c("DateTime", "Name", "ID", "LBMP", "MarginalLosses", "MarginalCongestion")
data$Type <- "Generator"
url2 <- "http://mis.nyiso.com/public/realtime/realtime_zone_lbmp.csv"
data2 <- read.csv(url2, stringsAsFactors=FALSE)
names(data2) <- c("DateTime", "Name", "ID", "LBMP", "MarginalLosses", "MarginalCongestion")
data2$Type <- "Zone"
data <- rbind.fill(data, data2)
con <- dbConnect(MySQL(), user="susan", dbname="susan")
d2 <- dbReadTable(con, "NyisoPower")
d2 <- unique(rbind(data, d2))
d <- dbWriteTable(con, "NyisoPower", d2, append=FALSE, overwrite=TRUE)
dbDisconnect(con)
#Natural Gas, Oil, Gas, etc. prices
url <- "http://www.bloomberg.com/energy/"
date <- Sys.Date()
tmp <- scrape(url)[[1]]
tmp2 <- getNodeSet(tmp, "//*/table")
data <- ldply(tmp2, function(i){
nrow <- length(xmlChildren(i))-1
header <- str_trim(unlist(lapply(xmlChildren(xmlChildren(i)[[1]]), xmlValue)))
header <- header[which(nchar(header)>0)]
x <- ldply(xmlChildren(i)[2:nrow], function(j){
header <- str_trim(unlist(lapply(xmlChildren(j), xmlValue)))
header <- header[which(nchar(header)>0)]
return(header)
})
x <- x[,-1]
names(x) <- header
x[which(apply(x, 1, function(i) sum(i==names(x))==0)),]
})
data$date <- date
con <- dbConnect(MySQL(), user="susan", dbname="susan")
d2 <- dbReadTable(con, "EnergyMarkets")
names(data) <- names(d2)
d2 <- unique(rbind(data, d2))
d <- dbWriteTable(con, "EnergyMarkets", d2, append=FALSE, overwrite=TRUE)
dbDisconnect(con)
#Metal future prices
url <- "http://www.bloomberg.com/markets/commodities/futures/metals/"
date <- Sys.Date()
tmp <- scrape(url)[[1]]
tmp2 <- getNodeSet(tmp, "//*/div[@class='std_data_container']")
data <- ldply(tmp2, function(i){
class.name <- xmlValue(xmlChildren(i)[["h2"]])
i <- xmlChildren(xmlChildren(i)[["div"]])[["table"]]
nrow <- length(xmlChildren(i))-1
header <- str_trim(unlist(lapply(xmlChildren(xmlChildren(i)[[1]]), xmlValue)))
header <- header[which(nchar(header)>0)]
x <- ldply(xmlChildren(i)[2:nrow], function(j){
header <- str_trim(unlist(lapply(xmlChildren(j), xmlValue)))
header <- header[which(nchar(header)>0)]
return(header)
})
x <- x[,-1]
names(x) <- header
x$class <- class.name
x[which(apply(x, 1, function(i) sum(i==names(x))==0)),]
})
data$date <- date
con <- dbConnect(MySQL(), user="susan", dbname="susan")
d2 <- dbReadTable(con, "MetalMarkets")
names(data) <- names(d2)
d2 <- unique(rbind(data, d2))
d <- dbWriteTable(con, "MetalMarkets", d2, append=FALSE, overwrite=TRUE)
dbDisconnect(con)
# Natural Gas Markets
url <- "http://www.naturalgasintel.com/ext/resources/Daily-GPI/ICE.htm"
tmp <- scrape(url)[[1]]
tmp2 <- getNodeSet(tmp, "/html/body/table")[[1]]
rowlist <- xmlChildren(tmp2)
date <- xmlValue(xmlChildren(rowlist[[1]])[[1]])
header <- str_trim(sapply(xmlChildren(rowlist[[2]]), xmlValue))
header <- header[which(nchar(header)>0)]
header <- gsub("#", "NUM", header, fixed=TRUE)
data <- ldply(rowlist[-c(1:2)], function(i) {
x <- str_trim(unlist(sapply(xmlChildren(i), xmlValue)))
x <- x[which(nchar(x)>0)]
})
data <- data[,-1]
names(data) <- header
data$TradeDate <- gsub("Trade Date: ", "", word(date, sep=";", start=1))
data$FlowDate <- str_replace(word(date, sep=";", start=-1), " Flow Date\\(s\\): ", "")
con <- dbConnect(MySQL(), user="susan", dbname="susan")
d2 <- dbReadTable(con, "NatGasMarket")
names(data) <- names(d2)
d2 <- unique(rbind(data, d2))
d <- dbWriteTable(con, "NatGasMarket", d2, append=FALSE, overwrite=TRUE)
dbDisconnect(con)
# Natural Gas Reserves
url <- "http://www.eia.gov/oil_gas/natural_gas/data_publications/eia914/eia914.html"
tmp <- scrape(url)[[1]]
tmp2 <- getNodeSet(tmp, "/html/body/table[4]/tr/td/table[5]")
head1 <- str_trim(sapply(xmlChildren(xmlChildren(tmp2[[1]])[[1]]), xmlValue))
head1 <- head1[which(nchar(head1)>0)]
head1 <- c(head1[1], rep(head1[-1], each=2))
data <- ldply(xmlChildren(tmp2[[1]])[-c(1:2)], function(i){
x <- str_trim(sapply(xmlChildren(i), xmlValue))
x <- x[which(nchar(x)>0)]
return(x)
})
data1 <- data[,-1]
names(data1) <- head1
data <- data1[,c(1, 2, 4, 6, 8, 10)]
table <- melt(data, id.vars=1, measure.vars=2:6)
names(table) <- c("Date", "Area", "Value")
table$Area <- as.character(table$Area)
tmp2 <- getNodeSet(tmp, "/html/body/table[4]/tr/td/table[6]")
head1 <- str_trim(sapply(xmlChildren(xmlChildren(tmp2[[1]])[[1]]), xmlValue))
head1 <- head1[which(nchar(head1)>0)]
head1 <- c(head1[1], rep(head1[-1], each=2))
data <- ldply(xmlChildren(tmp2[[1]])[-c(1:2)], function(i){
x <- str_trim(sapply(xmlChildren(i), xmlValue))
x <- x[which(nchar(x)>0)]
return(x)
})
data1 <- data[,-1]
names(data1) <- head1
data <- data1[,c(1, 2, 4, 6, 8, 10)]
data <- melt(data, id.vars=1, measure.vars=2:6)
names(data) <- c("Date", "Area", "Value")
data$Area <- as.character(data$Area)
table <- rbind(table, data)
table$Value <- as.numeric(str_sub(table$Value, 3, -1))
con <- dbConnect(MySQL(), user="susan", dbname="susan")
d2 <- dbReadTable(con, "NatGasReserves")
names(table) <- names(d2)
d2 <- unique(rbind(table, d2))
d <- dbWriteTable(con, "NatGasReserves", d2, append=FALSE, overwrite=TRUE)
dbDisconnect(con)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment