Created
October 16, 2019 02:03
-
-
Save jwhazel/0eedd85133b60b8d02b754557614f55d to your computer and use it in GitHub Desktop.
KY utility outages scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* In-browser scraper for KY utility outages | |
* 1.) Goto: https://psc.ky.gov/ors/PublicInfo_OutageIncidents.aspx | |
* 2.) Type: all, Other: other, From: 01/01/1999, To: today | |
* 3.) Copy this code into console and run | |
* 4.) Copy output into something like http://www.convertcsv.com/json-to-csv.htm | |
*/ | |
(function() { | |
let output = []; | |
//Select all <tr> elements in the table of results | |
let table = document.querySelectorAll("#dgIncidents > tbody > tr"); | |
//Loop through each result (start at 1 since 0 is the table header) | |
for (var i = 1; i < table.length; i++) { | |
let d = table[i].childNodes; | |
//Create the object we plan to use | |
let row = { | |
"Incident Date": formatTime(d[1].innerText.trim()), | |
Updated: "", | |
Company: d[2].innerText.trim(), | |
"Possible Cause": "", | |
Comment: "", | |
"County Count": 0, | |
Counties: [], | |
"Total Customers Out": [] | |
}; | |
//Get the possible cause if it exists | |
if (d[3].children[0].innerText.trim().toLowerCase() === "possible cause:") { | |
row["Possible Cause"] = d[3].children[1].innerText.trim(); | |
} | |
//Get the comment if it exists | |
if (d[3].children[3].innerText.trim().toLowerCase() === "comment:") { | |
row["Comment"] = d[3].children[4].innerText.trim(); | |
} | |
//Get the modified time if it exists | |
if (typeof d[3].children[5] !== "undefined") { | |
let u = d[3].children[5].innerText | |
.toLowerCase() | |
.replace("reported", "") | |
.replace("updated", "") | |
.replace("corrected", "") | |
.trim(); | |
row["Updated"] = formatTime(u); | |
} | |
//Handle the tables in the last column | |
let innerTable = d[4].children[0].children[0].children; | |
for (var y = 1; y < innerTable.length; y++) { | |
row["Counties"].push(innerTable[y].children[0].innerText.trim()); | |
row["Total Customers Out"].push( | |
Number(innerTable[y].children[1].innerText.replace(",", "").trim()) | |
); | |
} | |
//Convert the information scraped in the tables in the last column | |
row["County Count"] = row["Counties"].length; | |
row["Counties"] = row["Counties"].join(); | |
row["Total Customers Out"] = row["Total Customers Out"].reduce( | |
(a, b) => a + b, | |
0 | |
); | |
//Add this to our object | |
output.push(row); | |
} | |
//Output the JSON as a string | |
console.log(JSON.stringify(output)); | |
//Return yyyy-MM-dd HH:mm:ss to satisfy excel | |
function formatTime(str) { | |
let d = new Date(str); | |
return ( | |
d.getFullYear().toString() + | |
"-" + | |
((d.getMonth() + 1).toString().length == 2 | |
? (d.getMonth() + 1).toString() | |
: "0" + (d.getMonth() + 1).toString()) + | |
"-" + | |
(d.getDate().toString().length == 2 | |
? d.getDate().toString() | |
: "0" + d.getDate().toString()) + | |
" " + | |
(d.getHours().toString().length == 2 | |
? d.getHours().toString() | |
: "0" + d.getHours().toString()) + | |
":" + | |
((parseInt(d.getMinutes() / 5) * 5).toString().length == 2 | |
? (parseInt(d.getMinutes() / 5) * 5).toString() | |
: "0" + (parseInt(d.getMinutes() / 5) * 5).toString()) + | |
":00" | |
); | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment