Skip to content

Instantly share code, notes, and snippets.

@brianmario
Created June 21, 2013 18:43
Show Gist options
  • Save brianmario/5833373 to your computer and use it in GitHub Desktop.
Save brianmario/5833373 to your computer and use it in GitHub Desktop.
[WIP] Simple libbz2 wrapper using FFI
require 'ffi'
module Bzip2
extend FFI::Library
ffi_lib 'bz2'
class Error < StandardError; end
class ConfigError < Error; end
class BufferError < Error; end
class CorruptError < Error; end
DEFAULT_BLK_SIZE = 3
BZ_RUN = 0
BZ_FLUSH = 1
BZ_FINISH = 2
BZ_OK = 0
BZ_RUN_OK = 1
BZ_FLUSH_OK = 2
BZ_FINISH_OK = 3
BZ_STREAM_END = 4
BZ_SEQUENCE_ERROR = -1
BZ_PARAM_ERROR = -2
BZ_MEM_ERROR = -3
BZ_DATA_ERROR = -4
BZ_DATA_ERROR_MAGIC = -5
BZ_IO_ERROR = -6
BZ_UNEXPECTED_EOF = -7
BZ_OUTBUFF_FULL = -8
BZ_CONFIG_ERROR = -9
# blksize100k - Should be a value between 1 and 9 inclusive, and the actual block size used
# is 100000 x this figure. 9 gives the best compression but takes most memory.
# verbosity - Integer between 0-4. If the library has been compiled with -DBZ_NO_STDIO,
# no such output will appear for any verbosity setting.
# workFactor - Controls how the compression phase behaves when presented with worst case,
# highly repetitive, input data. If compression runs into difficulties caused
# by repetitive data, the library switches from the standard sorting algorithm
# to a fallback algorithm. The fallback is slower than the standard algorithm
# by perhaps a factor of three, but always behaves reasonably, no matter how bad
# the input.
# Lower values of workFactor reduce the amount of effort the standard algorithm
# will expend before resorting to the fallback. You should set this parameter
# carefully; too low, and many inputs will be handled by the fallback algorithm
# and so compress rather slowly, too high, and your average-to-worst case
# compression times can become very large. The default value of 30 gives reasonable
# behaviour over a wide range of circumstances.
# Allowable values range from 0 to 250 inclusive. 0 is a special case, equivalent
# to using the default value of 30.
# Note that the compressed output generated is the same regardless of whether or not
# the fallback algorithm is used.
# Be aware also that this parameter may disappear entirely in future versions of the
# library. In principle it should be possible to devise a good way to automatically
# choose which algorithm to use. Such a mechanism would render the parameter obsolete.
#
# Possible return values:
#
# BZ_CONFIG_ERROR
# if the library has been mis-compiled
# BZ_PARAM_ERROR
# if dest is NULL or destLen is NULL
# or blockSize100k < 1 or blockSize100k > 9
# or verbosity < 0 or verbosity > 4
# or workFactor < 0 or workFactor > 250
# BZ_MEM_ERROR
# if insufficient memory is available
# BZ_OUTBUFF_FULL
# if the size of the compressed data exceeds *destLen
# BZ_OK
# otherwise
attach_function :BZ2_bzBuffToBuffCompress,
[:pointer, :buffer_inout, :pointer, :uint32, :int, :int, :int ], :int
# dst, dst_len, src, src_len, blksize100k, verbosity, workFactor
def self.compress(data, blksize=DEFAULT_BLK_SIZE, verbosity=0, work_factor=0)
# Massage parameter values
blksize = 1 if blksize < 1
blksize = 9 if blksize > 9
verbosity = 0 if verbosity < 0
verbosity = 4 if verbosity > 4
work_factor = 0 if work_factor < 0
work_factor = 250 if work_factor > 250
# Initialize our work buffers
# To guarantee that the compressed data will fit in its buffer, allocate an output
# buffer of size 1% larger than the uncompressed data, plus six hundred extra bytes.
out_len = data.bytesize + (data.bytesize * 0.01) + 600
dst_buf = FFI::MemoryPointer.new(:char, out_len)
dst_len = FFI::MemoryPointer.new(:uint32)
dst_len.write_uint32(out_len)
src_buf = FFI::MemoryPointer.new(:char, data.bytesize)
src_buf.put_bytes(0, data)
# Do the work
ret = BZ2_bzBuffToBuffCompress(dst_buf, dst_len, src_buf, data.bytesize,
blksize, verbosity, work_factor)
# Check the return value
case ret
when BZ_OK
dst_buf.read_bytes(dst_len.read_uint32)
when BZ_PARAM_ERROR
raise ArgumentError, "One of blksize, verbosity or work_factor is out of range"
when BZ_MEM_ERROR
raise NoMemoryError, "Out of memory"
when BZ_OUTBUFF_FULL
raise BufferError, "Output buffer isn't large enough"
when BZ_CONFIG_ERROR
raise ConfigError, "libbz2 has ben mis-compiled"
else
raise Error, "Unhandled error code: #{ret}"
end
end
# small - If nonzero, the library will use an alternative decompression algorithm which
# uses less memory but at the cost of decompressing more slowly (roughly speaking,
# half the speed, but the maximum memory requirement drops to around 2300k).
# verbosity - Integer between 0-4. If the library has been compiled with -DBZ_NO_STDIO,
# no such output will appear for any verbosity setting.
#
# Possible return values:
#
# BZ_CONFIG_ERROR
# if the library has been mis-compiled
# BZ_PARAM_ERROR
# if dest is NULL or destLen is NULL
# or small != 0 && small != 1
# or verbosity < 0 or verbosity > 4
# BZ_MEM_ERROR
# if insufficient memory is available
# BZ_OUTBUFF_FULL
# if the size of the compressed data exceeds *destLen
# BZ_DATA_ERROR
# if a data integrity error was detected in the compressed data
# BZ_DATA_ERROR_MAGIC
# if the compressed data doesn't begin with the right magic bytes
# BZ_UNEXPECTED_EOF
# if the compressed data ends unexpectedly
# BZ_OK
# otherwise
attach_function :BZ2_bzBuffToBuffDecompress,
[:pointer, :buffer_inout, :pointer, :uint32, :int, :int ], :int
# dst, dst_len, src, src_len, small, verbosity
def self.decompress(data, small=0, verbosity=0)
# Massage parameter values
small = 0 if small < 0
verbosity = 0 if verbosity < 0
verbosity = 4 if verbosity > 4
# Initialize our work buffers
out_len = data.bytesize * 10 # lol - this is terrible
dst_buf = FFI::MemoryPointer.new(:char, out_len)
dst_len = FFI::MemoryPointer.new(:uint32)
dst_len.write_uint32(out_len)
src_buf = FFI::MemoryPointer.new(:char, data.bytesize)
src_buf.put_bytes(0, data)
ret = BZ2_bzBuffToBuffDecompress(dst_buf, dst_len, src_buf, data.bytesize,
small, verbosity)
# Check the return value
case ret
when BZ_OK
dst_buf.read_bytes(dst_len.read_uint32)
when BZ_PARAM_ERROR
raise ArgumentError, "One of sall or verbosity"
when BZ_MEM_ERROR
raise NoMemoryError, "Out of memory"
when BZ_OUTBUFF_FULL
raise BufferError, "Output buffer isn't large enough"
when BZ_DATA_ERROR, BZ_DATA_ERROR_MAGIC, BZ_UNEXPECTED_EOF
raise CorruptError, "Compressed data appears to be corrupt or unreadable"
when BZ_CONFIG_ERROR
raise ConfigError, "libbz2 has ben mis-compiled"
else
raise Error, "Unhandled error code: #{ret}"
end
end
attach_function :BZ2_bzlibVersion, [], :string
BZ2_VERSION = self.BZ2_bzlibVersion
def self.bz2_version
BZ2_VERSION
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment