Skip to content

Instantly share code, notes, and snippets.

@default-writer
Forked from midnightfreddie/ParseTable.ps1
Created July 25, 2018 10:24
Show Gist options
  • Save default-writer/43dcdf17d459904c993893e83f5b709a to your computer and use it in GitHub Desktop.
Save default-writer/43dcdf17d459904c993893e83f5b709a to your computer and use it in GitHub Desktop.
$Uri = "http://midnightfreddie.com/reddit/simpletable.html"
$InfoPage = Invoke-Webrequest -Uri $Uri
# Iterate over each <tbody> which contain all the body rows for each table
$InfoPage.ParsedHtml.getElementsByTagName("tbody") | ForEach-Object {
$Headers = $null
# Might need to uncomment the following line depending on table being parsed
# And if there is more than one table, need a way to get the right headers for each table
#$Headers = @("IP Address", "Hostname", "HW Address", "Device Type")
# Iterate over each <tr> in this table body
$_.getElementsByTagName("tr") | ForEach-Object {
# Select/get the <td>'s, but just grab the InnerText and make them an array
$OutputRow = $_.getElementsByTagName("td") | Select-Object -ExpandProperty InnerText
# If $Headers not defined, this must be the first row and must contain headers
# Otherwise create an object out of the row by building up a hash and then using it to make an object
# These objects can be piped to a lot of different cmdlets, like Out-GridView, ConvertTo-Csv, Format-Table, etc.
if ($Headers) {
$OutputHash = [ordered]@{}
for($i=0;$i -lt $OutputRow.Count;$i++) {
$OutputHash[$Headers[$i]] = $OutputRow[$i]
}
New-Object psobject -Property $OutputHash
} else {
$Headers = $OutputRow
}
}
}
# IP Address : 10.0.0.5
# Hostname : pokey
# FQDN : pokey.example.tld
# HW Address : 012345679abcdef
# Device Type : Static-Your-Momma
<!doctype html>
<html>
<head>
<title>Simple Table</title>
</head>
<body>
<h1>Simple Table</h1>
To parse for data
<table>
<tr>
<td>IP Address</td>
<td>Hostname</td>
<td>FQDN</td>
<td>HW Address</td>
<td>Device Type</td>
</tr>
<tr>
<td>10.0.0.5</td>
<td>pokey</td>
<td>pokey.example.tld</td>
<td>012345679abcdef</td>
<td>Static-Your-Momma</td>
</tr>
</table>
</body>
</html>
$Uri = "http://midnightfreddie.com/reddit/simpletable.html"
$InfoPage = Invoke-Webrequest -Uri $Uri
$Xml = [xml]($InfoPage.Content.Split("`n") | Where-Object { -not ($_ -imatch '<!doctype') })
# $Xml = [xml](Get-Content .\simpletable.html | Where-Object { -not ($_ -imatch '<!doctype') } | Out-String)
# Iterate over each <table> which contain all the body rows for each table
$Xml.SelectNodes("//table") | ForEach-Object {
# Clever: this will be null if there are no <th>'s and then the first row of <td>'s will be headers
$Headers = $_.SelectNodes("tr/th") | Select-Object -ExpandProperty InnerText
# Iterate over each <tr> in this table body
$_.SelectNodes("tr") | ForEach-Object {
# Select/get the <td>'s, but just grab the InnerText and make them an array
$OutputRow = $_.SelectNodes("td") | Select-Object -ExpandProperty InnerText
# If $Headers not defined, this must be the first row and must contain headers
# Otherwise create an object out of the row by building up a hash and then using it to make an object
# These objects can be piped to a lot of different cmdlets, like Out-GridView, ConvertTo-Csv, Format-Table, etc.
if ($Headers) {
$OutputHash = [ordered]@{}
for($i=0;$i -lt $OutputRow.Count;$i++) {
$OutputHash[$Headers[$i]] = $OutputRow[$i]
}
New-Object psobject -Property $OutputHash
} else {
$Headers = $OutputRow
}
}
}
# IP Address : 10.0.0.5
# Hostname : pokey
# FQDN : pokey.example.tld
# HW Address : 012345679abcdef
# Device Type : Static-Your-Momma
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment