Use pure C# to extract .tar and .tar.gz files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.IO; | |
using System.IO.Compression; | |
using System.Text; | |
namespace TarExample | |
{ | |
public class Tar | |
{ | |
/// <summary> | |
/// Extracts a <i>.tar.gz</i> archive to the specified directory. | |
/// </summary> | |
/// <param name="filename">The <i>.tar.gz</i> to decompress and extract.</param> | |
/// <param name="outputDir">Output directory to write the files.</param> | |
public static void ExtractTarGz(string filename, string outputDir) | |
{ | |
using (var stream = File.OpenRead(filename)) | |
ExtractTarGz(stream, outputDir); | |
} | |
/// <summary> | |
/// Extracts a <i>.tar.gz</i> archive stream to the specified directory. | |
/// </summary> | |
/// <param name="stream">The <i>.tar.gz</i> to decompress and extract.</param> | |
/// <param name="outputDir">Output directory to write the files.</param> | |
public static void ExtractTarGz(Stream stream, string outputDir) | |
{ | |
using (var gzip = new GZipStream(stream, CompressionMode.Decompress)) | |
{ | |
// removed convertation to MemoryStream | |
ExtractTar(gzip, outputDir); | |
} | |
} | |
/// <summary> | |
/// Extractes a <c>tar</c> archive to the specified directory. | |
/// </summary> | |
/// <param name="filename">The <i>.tar</i> to extract.</param> | |
/// <param name="outputDir">Output directory to write the files.</param> | |
public static void ExtractTar(string filename, string outputDir) | |
{ | |
using (var stream = File.OpenRead(filename)) | |
ExtractTar(stream, outputDir); | |
} | |
/// <summary> | |
/// Extractes a <c>tar</c> archive to the specified directory. | |
/// </summary> | |
/// <param name="stream">The <i>.tar</i> to extract.</param> | |
/// <param name="outputDir">Output directory to write the files.</param> | |
public static void ExtractTar(Stream stream, string outputDir) | |
{ | |
var buffer = new byte[100]; | |
// store current position here | |
long pos = 0; | |
while (true) | |
{ | |
pos += stream.Read(buffer, 0, 100); | |
var name = Encoding.ASCII.GetString(buffer).Trim('\0'); | |
if (String.IsNullOrWhiteSpace(name)) | |
break; | |
FakeSeekForward(stream, 24); | |
pos += 24; | |
pos += stream.Read(buffer, 0, 12); | |
var size = Convert.ToInt64(Encoding.UTF8.GetString(buffer, 0, 12).Trim('\0').Trim(), 8); | |
FakeSeekForward(stream, 376); | |
pos += 376; | |
var output = Path.Combine(outputDir, name); | |
if (!Directory.Exists(Path.GetDirectoryName(output))) | |
Directory.CreateDirectory(Path.GetDirectoryName(output)); | |
if (!name.Equals("./", StringComparison.InvariantCulture)) | |
{ | |
using (var str = File.Open(output, FileMode.OpenOrCreate, FileAccess.Write)) | |
{ | |
var buf = new byte[size]; | |
pos += stream.Read(buf, 0, buf.Length); | |
str.Write(buf, 0, buf.Length); | |
} | |
} | |
var offset = (int)(512 - (pos % 512)); | |
if (offset == 512) | |
offset = 0; | |
FakeSeekForward(stream, offset); | |
pos += offset; | |
} | |
} | |
private static void FakeSeekForward(Stream stream, int offset) | |
{ | |
if (stream.CanSeek) | |
stream.Seek(offset, SeekOrigin.Current); | |
else | |
{ | |
int bytesRead = 0; | |
var buffer = new byte[offset]; | |
while (bytesRead < offset) | |
{ | |
int read = stream.Read(buffer, bytesRead, offset - bytesRead); | |
if (read == 0) | |
throw new EndOfStreamException(); | |
bytesRead += read; | |
} | |
} | |
} | |
} | |
} |
Thx for the update.
But it still did not help me, because it doesnt support single files bigger than 2GB.
the line
var buf = new byte[size];
throws an OverflowException when called with bigger values.
single byte array max size is that
https://stackoverflow.com/questions/3944320/maximum-length-of-byte/53967254
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Modified the tgz decompression to support the tgz files of actual size more than 2GB