Last active
January 4, 2024 10:25
-
-
Save jacobwills/49ba628e3b49c6de9afa69512b974c83 to your computer and use it in GitHub Desktop.
Use PowerShell to extract content of HTML table tags and export to text file. Quick and dirty, but the idea is there.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$inputFile = $args[0] | |
$outputFile = $args[1] | |
$cvsOutputFile = (Split-Path -parent $outputFile)+"/"+[IO.Path]::GetFileNameWithoutExtension($outputFile)+".csv" | |
$html = New-Object -ComObject "HTMLFile" | |
$html.IHTMLDocument2_write($(Get-Content -Path $inputFile -Raw)) | |
$tables = @($html.getElementsByTagName("TABLE")) | |
$table = $tables[0] | |
$titles = @() | |
$rows = @($table.Rows) | |
$objArray = @() | |
foreach ($row in $rows) { | |
$cells = @($row.Cells) | |
if ($cells[0].tagName -eq "TH") { | |
$titles = @($cells | ForEach-Object { ("" + $_.InnerText).Trim() }) | |
continue | |
} | |
if (-not $titles) { | |
$titles = @(1..($cells.Count + 2) | ForEach-Object { "P$_" }) | |
} | |
$resultObject = [Ordered] @{ } | |
for ($counter = 0; $counter -lt $cells.Count; $counter++) { | |
$title = $titles[$counter] | |
if (-not $title) { continue } | |
$resultObject[$title] = ("" + $cells[$counter].InnerText).Trim() | |
} | |
$objArray += [PSCustomObject] $resultObject | |
} | |
$objArray | Export-Csv -Path $cvsOutputFile -NoTypeInformation | |
If (Test-Path -Path $outputFile -PathType leaf) { | |
Remove-Item $outputFile | |
} | |
$objArray | ForEach-Object { | |
$layout = "$($_.Name)`t$($_.Width)"; | |
$layout | Out-File -FilePath $outputFile -Append -Encoding OEM | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment