Skip to content

Instantly share code, notes, and snippets.

@santisq
Last active September 7, 2022 15:47
Show Gist options
  • Save santisq/c64891312f0c5ca1393ea2a07544fa72 to your computer and use it in GitHub Desktop.
Save santisq/c64891312f0c5ca1393ea2a07544fa72 to your computer and use it in GitHub Desktop.
Simple PowerShell function that allows to split a CSV by parts or by size using `StreamReader` and `StreamWriter`
function Split-Csv {
[CmdletBinding(DefaultParameterSetName = 'ByParts')]
param(
[Parameter(Mandatory, ValueFromPipelineByPropertyName)]
[alias('FullName')]
[string] $Path,
[parameter(Mandatory)]
[string] $DestinationFolder,
[parameter(ParameterSetName = 'BySize')]
[int] $BySize = 1kb,
[parameter(ParameterSetName = 'ByParts')]
[int] $ByParts = 3
)
process {
try {
[IO.FileInfo] $Path = $PSCmdlet.GetUnresolvedProviderPathFromPSPath($Path)
$Destination = $PSCmdlet.GetUnresolvedProviderPathFromPSPath($DestinationFolder)
if(-not (Test-Path $Destination)) {
$null = New-Item $Destination -ItemType Directory
}
$reader = [IO.StreamReader] $Path.FullName
$headers = $reader.ReadLine()
$Index = 0
if($PSCmdlet.ParameterSetName -eq 'ByParts') {
$ChunkSize = [math]::Ceiling($Path.Length / $ByParts) + $headers.Length
}
else {
$ChunkSize = $BySize - $headers.Length
}
$newWriter = {
$name = $Path.BaseName + " - Part $Index" + $Path.Extension
$newChunk = Join-Path $Destination -ChildPath $name
$writer = [IO.StreamWriter] $newChunk
$writer.AutoFlush = $true
$writer.WriteLine($headers)
$writer
}
$writer = & $newWriter
while(-not $reader.EndOfStream) {
if($writer.BaseStream.Length -ge $ChunkSize) {
$Index++
$writer.Dispose()
$writer = & $newWriter
}
$writer.WriteLine($reader.ReadLine())
}
}
catch {
$PSCmdlet.ThrowTerminatingError($_)
}
finally {
$writer, $reader | ForEach-Object Dispose
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment