Skip to content

Instantly share code, notes, and snippets.

@dkrusky
Created July 27, 2022 16:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dkrusky/f0d652410412670bd35a5f3a4126a76a to your computer and use it in GitHub Desktop.
Save dkrusky/f0d652410412670bd35a5f3a4126a76a to your computer and use it in GitHub Desktop.
Pure c# class to extract files from .tar.gz files in memory. Expects a byte array of the file to extract.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Text;
using System.Threading.Tasks;
public class TarGzExtractor
{
// path to save to
public string SaveDirectory { get; set; }
// extract full path, or files only
public bool ExtractFullPath { get; set; }
// extract only files that contain this string case insensitive
public string Filter { get; set; }
/// <summary>
/// Extract a <c>tar.gz</c> archive to the specified directory.
/// </summary>
/// <param name="gzip">The byte array of the <i>.tar.gz</i> to extract.</param>
public async Task<List<string>> DecompressAsync(byte[] gzip)
{
List<string> lstFiles = new List<string>();
// decompress gzip into memory
using (GZipStream stream = new GZipStream(new MemoryStream(gzip), CompressionMode.Decompress))
{
const int size = 4096;
Int64 size64 = 0;
byte[] buffer = new byte[size];
using (MemoryStream memory = new MemoryStream())
{
// handle gzip format
int count = 0;
do
{
count = await stream.ReadAsync(buffer, 0, size);
if (count > 0)
{
await memory.WriteAsync(buffer, 0, count);
}
}
while (count > 0);
memory.Seek(0, SeekOrigin.Begin);
// handle tar format
string name = "x";
string currentPath = "";
string bufferedString = "";
long position = 0;
long offset = 0;
buffer = new byte[100];
try
{
while (true)
{
buffer = new byte[100];
await memory.ReadAsync(buffer, 0, 100);
name = Encoding.ASCII.GetString(buffer).Trim('\0').Replace('/', '\\').Trim('/', '\\');
memory.Seek(24, SeekOrigin.Current);
if (String.IsNullOrWhiteSpace(name)) { break; }
await memory.ReadAsync(buffer, 0, 12);
bufferedString = Encoding.UTF8.GetString(buffer, 0, 12).Trim('\0').Trim();
size64 = Convert.ToInt64(bufferedString, 8);
memory.Seek(376L, SeekOrigin.Current);
currentPath = Path.GetDirectoryName(Path.Combine(SaveDirectory, name));
if (ExtractFullPath)
{
if (size64 > 0)
{
if (!Directory.Exists(currentPath))
{
Directory.CreateDirectory(currentPath);
}
if ((Filter.Length > 0 && name.ToLower().Contains(Filter.ToLower())) || Filter.Length == 0)
{
// only save if filename contains something in Filter or Filter is empty
using (FileStream fs = new FileStream(Path.Combine(SaveDirectory, name), FileMode.OpenOrCreate, FileAccess.Write))
{
buffer = new byte[size64];
await memory.ReadAsync(buffer, 0, buffer.Length);
await fs.WriteAsync(buffer, 0, buffer.Length);
lstFiles.Add(Path.Combine(SaveDirectory, name));
}
}
else
{
buffer = new byte[size64];
await memory.ReadAsync(buffer, 0, buffer.Length);
}
}
else
{
if (!File.GetAttributes(currentPath).HasFlag(FileAttributes.Directory))
{
Directory.CreateDirectory(currentPath);
}
}
}
else
{
if (size64 > 0)
{
name = Path.GetFileName(Path.Combine(SaveDirectory, name));
if ((Filter.Length > 0 && name.ToLower().Contains(Filter.ToLower())) || Filter.Length == 0)
{
using (FileStream fs = new FileStream(Path.Combine(SaveDirectory, name), FileMode.OpenOrCreate, FileAccess.Write))
{
buffer = new byte[size64];
await memory.ReadAsync(buffer, 0, buffer.Length);
await fs.WriteAsync(buffer, 0, buffer.Length);
lstFiles.Add(Path.Combine(SaveDirectory, name));
}
}
else
{
buffer = new byte[size64];
await memory.ReadAsync(buffer, 0, buffer.Length);
}
}
}
position = memory.Position;
offset = 512 - (position % 512);
if (offset == 512) { offset = 0; }
memory.Seek(offset, SeekOrigin.Current);
};
}
catch (Exception ex)
{
// Get stack trace for the exception with source file information
var st = new StackTrace(ex, true);
// Get the top stack frame
var frame = st.GetFrame(st.FrameCount - 1);
// Get the line number from the stack frame
var line = frame.GetFileLineNumber();
}
}
}
return lstFiles;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment