Skip to content

Instantly share code, notes, and snippets.

@DBremen
Created August 27, 2015 13:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DBremen/67a2b18e59f440184f47 to your computer and use it in GitHub Desktop.
Save DBremen/67a2b18e59f440184f47 to your computer and use it in GitHub Desktop.
Clean up and Cross-Join excel table containing cells with multiple entries separated by comman or linebreaks
function CartesianProduct($htRow, $currCol=0){
$colCount = $htRow.Keys.Count
if ($currCol -eq 0){
$wordIndices = New-Object int[] $colCount
}
$wordCount = ($htRow.Values | select)[$currCol].Count
#walk through the items in the current
for ($wordIndex = 0; $wordIndex -lt $wordCount; $wordIndex++){
#add the index to the indices for the current column
$wordIndices[$currCol] = $wordIndex
#if we reach the end of the row
if ($currCol -eq ($colCount - 1)) {
$htCartesianSet = [ordered]@{}
for ($colIndex = 0; $colIndex -lt $colCount; $colIndex++){
#add the items to the result set based on the collected indices
$key = ($htRow.Keys | select)[$colIndex]
$value = ($htRow.Values | select)[$colIndex][$wordIndices[$colIndex]]
$htCartesianSet.Add($key, $value)
}
[PSCustomObject]$htCartesianSet
}
#do this for every column
else {
CartesianProduct $htRow ($currCol + 1)
}
}
}
function Remove-ComObject {
end {
Start-Sleep -Milliseconds 500
[Management.Automation.ScopedItemOptions]$scopedOpt = 'ReadOnly, Constant'
Get-Variable -Scope 1 | Where-Object {
$_.Value.PSTypeNames -contains 'System.__ComObject' -and -not ($scopedOpt -band $_.Options)
} | Remove-Variable -Scope 1 -Verbose:([Bool]$PSBoundParameters['Verbose'].IsPresent)
[GC]::Collect()
}
}
function ImportAndCrossJoin($path){
$xls = New-Object -ComObject Excel.Application
$xls.Visible = $false
$wb = $xls.Workbooks.Open($path)
$ws = $wb.Sheets.Item(1)
$lastRow = ($ws.UsedRange.Rows).Count
$lastCol = ($ws.UsedRange.Columns).Count
foreach ($row in (2..$lastRow)){
$ht = [ordered]@{}
foreach ($col in (1..$lastCol)){
$heading = $ws.Cells.Item(1,$col).value2
#split the cell by crlf or comma
$parts = ($ws.Cells.Item($row,$col).value2 -split ",|`n").Trim()
$ht."$heading" = @($parts)
}
CartesianProduct $ht
}
$wb.Close()
$xls.Quit()
}
ImportAndCrossJoin C:\test.xlsx | Export-CSV -NoTypeInformation -path C:\test2.csv
ii C:\test2.csv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment