Skip to content

Instantly share code, notes, and snippets.

@davepermen
Last active February 8, 2024 00:07
Show Gist options
  • Save davepermen/e906fb4e7ae30461bb3f6f80b4789df5 to your computer and use it in GitHub Desktop.
Save davepermen/e906fb4e7ae30461bb3f6f80b4789df5 to your computer and use it in GitHub Desktop.
static public class ByteHelpers
{
static public Stream WriteBytes(this Stream stream, params byte[] bytes)
{
stream.Write(bytes);
return stream;
}
static public Stream Write_8(this Stream stream, params byte[] bytes)
{
stream.Write(bytes);
return stream;
}
static public Stream Write16(this Stream stream, params UInt16[] values)
{
foreach (var value in values)
{
stream.WriteByte((byte)(value >> 0));
stream.WriteByte((byte)(value >> 8));
}
return stream;
}
static public Stream Write32(this Stream stream, params UInt32[] values)
{
foreach (var value in values)
{
stream.WriteByte((byte)(value >> 0));
stream.WriteByte((byte)(value >> 8));
stream.WriteByte((byte)(value >> 16));
stream.WriteByte((byte)(value >> 24));
}
return stream;
}
static public Stream Write64(this Stream stream, params UInt64[] values)
{
foreach (var value in values)
{
stream.WriteByte((byte)(value >> 0));
stream.WriteByte((byte)(value >> 8));
stream.WriteByte((byte)(value >> 16));
stream.WriteByte((byte)(value >> 24));
stream.WriteByte((byte)(value >> 32));
stream.WriteByte((byte)(value >> 40));
stream.WriteByte((byte)(value >> 48));
stream.WriteByte((byte)(value >> 56));
}
return stream;
}
}
using System.Text;
namespace UncompressedZipWriter;
record FileInZip(string Name, Stream Stream, long Size, DateTime LastModified)
{
public ulong Offset { get; set; } = 0;
public ushort TimeBits => (ushort)((LastModified.Second / 2) | LastModified.Minute << 5 | LastModified.Hour << 11);
public ushort DateBits => (ushort)(LastModified.Day | LastModified.Month << 5 | (LastModified.Year - 1980) << 9);
public uint CrcBits { get; set; } = 0;
public byte[] NameAsBytes => Encoding.UTF8.GetBytes(Name);
}
record FileInZipSize(string Name, long Size)
{
public byte[] NameAsBytes => Encoding.UTF8.GetBytes(Name);
}
using Humanizer;
using System.Diagnostics;
using UncompressedZipWriter;
Console.WriteLine("Let's Zip This!\n");
TestZip(
Directory.GetFiles(@"C:\Users\spoda\OneDrive - Coop Genossenschaft\Downloads"),
@"C:\Users\spoda\Downloads\Downloads.test.zip"
);
TestZip(
[
@"C:\Users\spoda\Downloads\Files\Big4_0GB.bin"
],
@"C:\Users\spoda\Downloads\Big4_0GB.test.zip"
);
TestZip(
[
@"C:\Users\spoda\Downloads\Files\Big4_0GB.bin",
@"C:\Users\spoda\Downloads\Files\Big4_1GB.bin"
],
@"C:\Users\spoda\Downloads\Big4_0_1GB.test.zip"
);
TestZip(
[
@"C:\Users\spoda\Downloads\Files\Big1_0GB.bin",
@"C:\Users\spoda\Downloads\Files\Big1_1GB.bin",
@"C:\Users\spoda\Downloads\Files\Big1_2GB.bin",
@"C:\Users\spoda\Downloads\Files\Big1_3GB.bin",
@"C:\Users\spoda\Downloads\Files\Big1_4GB.bin"
],
@"C:\Users\spoda\Downloads\Big1_0-4GB.test.zip"
);
TestZip(
Directory.GetFiles(@"C:\Users\spoda\OneDrive - Coop Genossenschaft\Downloads").Concat(
[
@"C:\Users\spoda\Downloads\Files\Big1_0GB.bin",
@"C:\Users\spoda\Downloads\Files\Big1_1GB.bin",
@"C:\Users\spoda\Downloads\Files\Big1_2GB.bin",
@"C:\Users\spoda\Downloads\Files\Big1_3GB.bin",
@"C:\Users\spoda\Downloads\Files\Big1_4GB.bin"
]),
@"C:\Users\spoda\Downloads\All.test.zip"
);
void TestZip(IEnumerable<string> filelist, string file)
{
var timer = Stopwatch.StartNew();
Console.WriteLine(Path.GetFileName(file));
File.Delete(file);
using (var stream = File.OpenWrite(file))
{
stream.FilesToZip(filelist);
}
Console.WriteLine($"Size: {Zip64.FilesToZipSize(filelist)} - {new FileInfo(file).Length} = {Zip64.FilesToZipSize(filelist) - new FileInfo(file).Length}");
timer.Stop();
Console.WriteLine($"Duration: {timer.Elapsed.Humanize(precision: 2)}");
Console.WriteLine();
}
/// https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
/// https://rzymek.github.io/post/excel-zip64/
namespace UncompressedZipWriter;
static public class Zip64
{
const ushort bitflags = 0b0000_1000_0000_1000; // (bit 3 for Data Descriptor at End, bit 11 for UTF-8)
static public long FilesToZipSize(IEnumerable<(string name, string path)> filesToZipSize)
{
var files = filesToZipSize.Select(f => new FileInZipSize(Name: f.name, Size: new FileInfo(f.path).Length));
int zip64offsetReached = 0;
ulong offset = 0;
foreach(var file in files)
{
if(offset >= 0xFFFFFFFF)
{
zip64offsetReached++;
}
offset += (ulong)(50 + file.NameAsBytes.Length + file.Size);
}
return files.Sum(f => 50 + f.NameAsBytes.Length + f.Size + 66 + f.NameAsBytes.Length)
+ zip64offsetReached * 8
+ 98;
}
static public void FilesToZip(this Stream zip, IEnumerable<(string name, string path)> filesToZip)
{
var files = filesToZip.Select(f => new FileInZip(Name: f.name, Stream: File.OpenRead(f.path), Size: new FileInfo(f.path).Length, LastModified: new FileInfo(f.path).LastWriteTime)).ToArray();
ulong position = 0;
/// [local file entries]
foreach (var file in files)
{
position += zip.WriteFileEntry(file, position);
file.Stream.Close();
}
/// [central directory]
var start = position;
ulong length = 0;
foreach (var file in files)
{
length += zip.WriteCentralDirectoryEntry(file);
}
zip.WriteEndOfCentralDirectory(count: (ulong)files.Length, offset: start, length: length);
}
static ulong WriteFileEntry(this Stream zip, FileInZip file, ulong offset)
{
file.Offset = offset;
// 4 + 2*5 + 4*3 + 2*2 + ... + ... + 4 + 8*2 = 50 + filename.Length + file.Length
zip
.Write_8(0x50, 0x4b, 0x03, 0x04) /// header [local file header]
.Write16(45, bitflags, 0, file.TimeBits, file.DateBits) // version (45 = ZIP64) | general purpose bitflag | compression method (0 = store) | time | date
.Write32(0, 0, 0) // CRC bits | compressed size | uncompressed size => 0 each for data descriptor
.Write16((ushort)file.NameAsBytes.Length, 0) // filename length | extrafield size
.Write_8(file.NameAsBytes) // filename
.WriteStreamAndComputeCrc(file.Stream, crc => file.CrcBits = crc) /// write the actual data and calculate crc
.Write32(file.CrcBits) // CRC bits
.Write64((ulong)file.Size, (ulong)file.Size) // compressed size: ZIP64 extra | uncompressed size: ZIP64 extra
;
return (ulong)(50 + file.NameAsBytes.Length + file.Size);
}
static ulong WriteCentralDirectoryEntry(this Stream zip, FileInZip file)
{
// 4 + 2*6 + 4*3 + 2*5 + 4*2 + ... + 2 + 2 + 8*2 = 74 + filename.Length
var zip64offset = file.Offset >= 0xFFFFFFFF;
zip
.Write_8(0x50, 0x4b, 0x01, 0x02) /// header [central directory header]
.Write16(45, 45, bitflags, 0, file.TimeBits, file.DateBits) // version (ZIP64) | min version to extract (ZIP64) | general purpose bitflag (bit 3 for Data Descriptor at End, bit 11 for UTF-8) | compression method (0 = store) | time | date
.Write32(file.CrcBits, 0xFFFFFFFF, 0xFFFFFFFF) // CRC bits | compressed size | uncompressed size => FFFFFFFF for ZIP64
.Write16((ushort)file.NameAsBytes.Length) // filename length
.Write16(zip64offset ? (ushort)28 : (ushort)20, 0, 0, 0) // extrafield length | file comment length | disk number | internal file attributes
.Write32(0, zip64offset ? 0xFFFFFFFF : (uint)file.Offset) // external file attributes, offset of file
.Write_8(file.NameAsBytes) // filename
.Write_8(0x01, 0x00) /// extrafield header
.Write16(zip64offset ? (ushort)24 : (ushort)16) // size of extrafield (below)
.Write64((ulong)file.Size, (ulong)file.Size) // compressed size: ZIP64 extra | uncompressed size: ZIP64 extra
;
if(zip64offset)
{
zip.Write64(file.Offset);
}
return (ulong)(66 + file.NameAsBytes.Length + (file.Offset >= 0xFFFFFFFF ? 8 : 0));
}
static Stream WriteEndOfCentralDirectory(this Stream zip, ulong count, ulong offset, ulong length)
{
// 4 + 8 + 2*2 + 4*2 + 8*4
// + 4 + 4 + 8 + 4
// + 4 + 2*4 + 4 + 4 + 2
// = 98
return zip
.Write_8(0x50, 0x4b, 0x06, 0x06) /// header [zip64 end of central directory record]
.Write64(44) // size of remaining record is 56 bytes
.Write16(45, 45) // version (ZIP64) | min version to extract (ZIP64)
.Write32(0, 0) // number of this disk | number of the disk with the start of the central directory
.Write64(count, count, length, offset) // total number of entries in the central directory on this disk | total number of entries in the central directory | size of central directory | offset of start of central directory with respect to the starting disk number
.Write_8(0x50, 0x4b, 0x06, 0x07) /// header [zip64 end of central directory locator]
.Write32(0) // number of the disk with the start of the zip64 end of central directory
.Write64(offset + length) // relative offset of the zip64 end of central directory record
.Write32(1) // total number of disks
.Write_8(0x50, 0x4b, 0x05, 0x06) /// header [end of central directory record]
.Write16(0, 0, 0xFFFF, 0xFFFF) // disk number | starting disk | central directory number | central directory amount
.Write32(0xFFFFFFFF) // central directory sizes
.Write32(0xFFFFFFFF) // central directory offset
.Write16(0)
;
}
static Stream WriteStreamAndComputeCrc(this Stream output, Stream input, Action<uint> calculatedCrc)
{
byte[] buff = new byte[1024 * 1024];
int len = input.Read(buff, 0, buff.Length);
uint crc = Force.Crc32.Crc32Algorithm.Compute(buff, 0, len);
output.Write(buff, 0, len);
while ((len = input.Read(buff, 0, buff.Length)) > 0)
{
crc = Force.Crc32.Crc32Algorithm.Append(crc, buff, 0, len);
output.Write(buff, 0, len);
}
calculatedCrc(crc);
return output;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment