Created
October 1, 2021 17:25
-
-
Save vavalomi/1a7534c103ccc780ffee3a6ca021e478 to your computer and use it in GitHub Desktop.
Commandlet to split CSV file vertically in Powershell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<# | |
.SYNOPSIS | |
Split CSV file vertically to have specified number of columns in each file | |
.DESCRIPTION | |
This is an example function how to split a wide file into multiple narrower files. | |
Potential use cases could be importing data from a csv file into an SQL database, | |
when number of columns in the CSV file is greater than the maximum table size. | |
.PARAMETER Path | |
Path to the input CSV file | |
.PARAMETER Output | |
Output file name with variable placeholder. | |
.PARAMETER IdColumns | |
List of columns to incude in every chunk. The list is assumed to be delimited with the same character as data | |
.PARAMETER Delimiter | |
What delimiter is used in the input file. The same will be used for the output. | |
.PARAMETER ChunkSize | |
How many columns (including ID columns) should be in each output. The last file may have less than that. | |
.EXAMPLE | |
PS> Split-CSV -Path "C:\Data\input_file.csv" -IdColumns "col1, col5" -Output "C:\Data\output_part{0}.csv" | |
.EXAMPLE | |
PS> Split-CSV -Path "~/data\input_file.csv" -IdColumns "proc_id;name" -Output "~/data\output_part{0}.csv" -Delimiter ";" -ChunkSize 1024 | |
#> | |
function Split-CSV{ | |
param( | |
$Path, | |
$IdColumns, | |
$Output, | |
$Delimiter = ",", | |
$ChunkSize = 10 | |
) | |
$HeaderLine = (Get-Content $Path -Head 1).Split($Delimiter) | |
if ($HeaderLine.length -le $ChunkSize) { | |
Write-Host "Number of columns is already less than $ChunkSize." | |
return | |
} | |
$IdIndices = $IdColumns.Split($Delimiter) | ForEach-Object { | |
[array]::indexof($HeaderLine, $_.Trim()) | |
} | |
$Headers = Split-Array $HeaderLine $IdIndices $ChunkSize $Delimiter | |
$OutputFiles = 1..($Headers.length) | ForEach-Object { | |
$file = $Output -f $_ | |
Remove-item -Path $file -Force -ErrorAction SilentlyContinue | |
$file | |
} | |
Get-Content $Path | ForEach-Object { | |
$row = Split-Array $_.Split($Delimiter) $IdIndices $ChunkSize $Delimiter | |
for ($i=0; $i -lt $row.length; $i++) { | |
Add-Content -Path $OutputFiles[$i] -Value $row[$i] | |
} | |
} | |
} | |
function Split-Array($Array, $Keep, $ChunkSize, $Delimiter) { | |
$Size = $Array.length | |
$l = 0 | |
$u = $l + $ChunkSize - 1 | |
$ret = @() | |
While ($l -lt $Size) { | |
$indices = $Keep + ($l..$u) | Sort-Object -Unique | |
While ($indices.length -gt $ChunkSize) { | |
$u = $u - 1 | |
$indices = $Keep + ($l..$u) | Sort-Object -Unique | |
} | |
$ret += ($Array[$indices] -join $Delimiter) | |
$l = $u + 1 | |
$u = $l + $ChunkSize - 1 | |
} | |
return $ret | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment