Skip to content

Instantly share code, notes, and snippets.

@julian-savage
Created August 7, 2016 13:23
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save julian-savage/ed81f01c1f3e1bf1c92d3d105d14d459 to your computer and use it in GitHub Desktop.
Save julian-savage/ed81f01c1f3e1bf1c92d3d105d14d459 to your computer and use it in GitHub Desktop.
InputStream which decompresses gzip/zlib compressed data
import Foundation
import Compression
public class GzipInputStream : InputStream {
private var decompressor : Decompressor
override public init(data: Data) {
let compressedInputStream = InputStream(data: data)
self.decompressor = Decompressor(compressedInputStream)
super.init(data: data)
}
override public init?(url: URL)
{
let compressedInputStream = InputStream(url: url)
guard compressedInputStream != nil else {
return nil
}
self.decompressor = Decompressor(compressedInputStream!)
super.init(url: url)
}
override public func open()
{
self.decompressor.open()
}
override public func close()
{
self.decompressor.close()
}
// reads up to length bytes into the supplied buffer, which must be at least of size len. Returns the actual number of bytes read.
override public func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength len: Int) -> Int
{
return self.decompressor.read(buffer, maxLength: len)
}
// returns in O(1) a pointer to the buffer in 'buffer' and by reference in 'len' how many bytes are available. This buffer is only valid until the next stream operation. Subclassers may return NO for this if it is not appropriate for the stream type. This may return NO if the buffer is not available.
override public func getBuffer(_ buffer: UnsafeMutablePointer<UnsafeMutablePointer<UInt8>?>, length len: UnsafeMutablePointer<Int>) -> Bool {
// Unimplemented - data is decompressed straight into read() caller's buffer, and returning the compressed
// data wouldn't make much sense
return false
}
// returns YES if the stream has bytes available or if it impossible to tell without actually doing the read.
override public var hasBytesAvailable: Bool {
return self.decompressor.hasBytesAvailable
}
}
// Main functionality implemented in this separate class to avoid init() duplication in parent class
private class Decompressor {
static let READ_BUFFER_SIZE = 8 * 1024
// RFC 1952 - GZIP Constants
static let GZIP_MINIMUM_HEADER_SIZE = 10
static let GZIP_ID1 : UInt8 = 0x1f
static let GZIP_ID2 : UInt8 = 0x8b
static let GZIP_FNAME_FLAG : UInt8 = 8
var compressedInputStream : InputStream
var skipHeader : Bool
var readBuffer : UnsafeMutablePointer<UInt8>
var compressionStream : UnsafeMutablePointer<compression_stream>
var compressionStatus : compression_status
init(_ compressedInputStream : InputStream) {
self.compressedInputStream = compressedInputStream
self.skipHeader = true
self.readBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: Decompressor.READ_BUFFER_SIZE)
self.compressionStream = UnsafeMutablePointer<compression_stream>.allocate(capacity: 1)
self.compressionStatus = compression_stream_init(self.compressionStream, COMPRESSION_STREAM_DECODE, COMPRESSION_ZLIB)
self.compressionStream.pointee.src_ptr = UnsafePointer<UInt8>(self.readBuffer)
self.compressionStream.pointee.src_size = 0
}
func open() {
self.compressedInputStream.open()
}
func close() {
self.compressedInputStream.close()
}
deinit {
compression_stream_destroy(self.compressionStream)
self.readBuffer.deinitialize()
self.compressionStream.deinitialize()
}
func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength len: Int) -> Int {
var currentBufferPosition = UnsafeMutablePointer<UInt8>(buffer)
while ((currentBufferPosition - buffer) < len && self.compressionStatus == COMPRESSION_STATUS_OK) {
// Start by running compression process to get any bytes still available
self.compressionStream.pointee.dst_ptr = currentBufferPosition
self.compressionStream.pointee.dst_size = len - (currentBufferPosition - buffer)
self.compressionStatus = compression_stream_process(compressionStream, 0)
guard self.compressionStatus != COMPRESSION_STATUS_ERROR else {
return -1
}
// compression_stream_process updates dst_ptr so we can calculate
let decompressedLength = self.compressionStream.pointee.dst_ptr - currentBufferPosition
currentBufferPosition = self.compressionStream.pointee.dst_ptr
// If decompression produced no results then add data from compressedInputStream to compressionStream
if decompressedLength == 0 {
let bytesRead = self.compressedInputStream.read(self.readBuffer, maxLength: Decompressor.READ_BUFFER_SIZE)
var bytesSkipped = 0
// gzip created files have headers which compress_stream_process doesn't understand
// they also have trailers but they are ignored by compress_stream_process if it hits the zlib end
if self.skipHeader {
if bytesRead > Decompressor.GZIP_MINIMUM_HEADER_SIZE && self.readBuffer[0] == Decompressor.GZIP_ID1 && self.readBuffer[1] == Decompressor.GZIP_ID2 {
// start by skipping fixed 10 byte header
bytesSkipped += Decompressor.GZIP_MINIMUM_HEADER_SIZE
let flg = self.readBuffer[3]
let fname : Bool = flg & Decompressor.GZIP_FNAME_FLAG > 0
// skip variable length file name if FNAME flag set
if fname {
while (self.readBuffer[bytesSkipped] != 0) {
bytesSkipped += 1
}
bytesSkipped += 1
}
}
// Remember we've skipped the header
self.skipHeader = false
}
self.compressionStream.pointee.src_ptr = UnsafePointer<UInt8>(self.readBuffer) + bytesSkipped
self.compressionStream.pointee.src_size = bytesRead - bytesSkipped
}
}
return self.compressionStatus == COMPRESSION_STATUS_ERROR ? -1 : (currentBufferPosition - buffer)
}
func getBuffer(_ buffer: UnsafeMutablePointer<UnsafeMutablePointer<UInt8>?>, length len: UnsafeMutablePointer<Int>) -> Bool {
return false
}
var hasBytesAvailable: Bool {
// Will always return true for the first call even if compressedInputStream will not return data, which is safe for InputStream where hasBytesAvailable may return true if it doesn't know if read() will succeed
return self.compressionStatus == COMPRESSION_STATUS_OK
}
}
@sebouh00
Copy link

sebouh00 commented Feb 9, 2023

Hi. What is the license associated with this code? May I use it in a private/closed-source project?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment