Skip to content

Instantly share code, notes, and snippets.

@adamfisher
Last active December 31, 2018 16:16
Show Gist options
  • Save adamfisher/e4a9a6b5a67d2401703c945ccb8035f4 to your computer and use it in GitHub Desktop.
Save adamfisher/e4a9a6b5a67d2401703c945ccb8035f4 to your computer and use it in GitHub Desktop.
Extension method for FileInfo to split a big file into chunks.
/// <summary>
/// Splits a file into multiple files based on the specified chunk size of each file.
/// </summary>
/// <param name="file">The file.</param>
/// <param name="chunkSize">The maximum number of bytes to store in each file.
/// If a chunk size is not provided, files will be split into 1 MB chunks by default.
/// The breakOnNewlines parameter can slightly affect the size of each file.</param>
/// <param name="targetPath">The destination where the split files will be saved.</param>
/// <param name="deleteAfterSplit">if set to <c>true</c>, the original file is deleted after creating the newly split files.</param>
/// <param name="breakOnNewlines">if set to <c>true</c> break the file on the next newline once the chunk size limit is reached.</param>
/// <returns>
/// An array of references to the split files.
/// </returns>
/// <exception cref="ArgumentNullException">file</exception>
/// <exception cref="ArgumentOutOfRangeException">chunkSize - The chunk size must be larger than 0 bytes.</exception>
public static FileInfo[] SplitOnChunkSize(
this FileInfo file,
int chunkSize = 1000000,
DirectoryInfo targetPath = null,
bool deleteAfterSplit = false,
bool breakOnNewlines = true
)
{
if (file == null)
throw new ArgumentNullException(nameof(file));
if (chunkSize < 1)
throw new ArgumentOutOfRangeException(nameof(chunkSize), chunkSize,
"The chunk size must be larger than 0 bytes.");
if (file.Length <= chunkSize)
return new[] {file};
var buffer = new byte[chunkSize];
var extraBuffer = new List<byte>();
targetPath = targetPath ?? file.Directory;
var chunkedFiles = new List<FileInfo>((int)Math.Abs(file.Length / chunkSize) + 1);
using (var input = file.OpenRead())
{
var index = 1;
while (input.Position < input.Length)
{
var chunkFileName = new FileInfo(Path.Combine(targetPath.FullName, $"{file.Name}.CHUNK_{index++}"));
chunkedFiles.Add(chunkFileName);
using (var output = chunkFileName.Create())
{
var chunkBytesRead = 0;
while (chunkBytesRead < chunkSize)
{
var bytesRead = input.Read(buffer,
chunkBytesRead,
chunkSize - chunkBytesRead);
if (bytesRead == 0)
{
break;
}
chunkBytesRead += bytesRead;
}
if (breakOnNewlines)
{
var extraByte = buffer[chunkSize - 1];
while (extraByte != '\n')
{
var flag = input.ReadByte();
if (flag == -1)
break;
extraByte = (byte)flag;
extraBuffer.Add(extraByte);
}
output.Write(buffer, 0, chunkBytesRead);
if (extraBuffer.Count > 0)
output.Write(extraBuffer.ToArray(), 0, extraBuffer.Count);
extraBuffer.Clear();
}
}
}
}
if (deleteAfterSplit)
file.Delete();
return chunkedFiles.ToArray();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment