Created
June 21, 2013 18:43
-
-
Save brianmario/5833373 to your computer and use it in GitHub Desktop.
[WIP] Simple libbz2 wrapper using FFI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'ffi' | |
module Bzip2 | |
extend FFI::Library | |
ffi_lib 'bz2' | |
class Error < StandardError; end | |
class ConfigError < Error; end | |
class BufferError < Error; end | |
class CorruptError < Error; end | |
DEFAULT_BLK_SIZE = 3 | |
BZ_RUN = 0 | |
BZ_FLUSH = 1 | |
BZ_FINISH = 2 | |
BZ_OK = 0 | |
BZ_RUN_OK = 1 | |
BZ_FLUSH_OK = 2 | |
BZ_FINISH_OK = 3 | |
BZ_STREAM_END = 4 | |
BZ_SEQUENCE_ERROR = -1 | |
BZ_PARAM_ERROR = -2 | |
BZ_MEM_ERROR = -3 | |
BZ_DATA_ERROR = -4 | |
BZ_DATA_ERROR_MAGIC = -5 | |
BZ_IO_ERROR = -6 | |
BZ_UNEXPECTED_EOF = -7 | |
BZ_OUTBUFF_FULL = -8 | |
BZ_CONFIG_ERROR = -9 | |
# blksize100k - Should be a value between 1 and 9 inclusive, and the actual block size used | |
# is 100000 x this figure. 9 gives the best compression but takes most memory. | |
# verbosity - Integer between 0-4. If the library has been compiled with -DBZ_NO_STDIO, | |
# no such output will appear for any verbosity setting. | |
# workFactor - Controls how the compression phase behaves when presented with worst case, | |
# highly repetitive, input data. If compression runs into difficulties caused | |
# by repetitive data, the library switches from the standard sorting algorithm | |
# to a fallback algorithm. The fallback is slower than the standard algorithm | |
# by perhaps a factor of three, but always behaves reasonably, no matter how bad | |
# the input. | |
# Lower values of workFactor reduce the amount of effort the standard algorithm | |
# will expend before resorting to the fallback. You should set this parameter | |
# carefully; too low, and many inputs will be handled by the fallback algorithm | |
# and so compress rather slowly, too high, and your average-to-worst case | |
# compression times can become very large. The default value of 30 gives reasonable | |
# behaviour over a wide range of circumstances. | |
# Allowable values range from 0 to 250 inclusive. 0 is a special case, equivalent | |
# to using the default value of 30. | |
# Note that the compressed output generated is the same regardless of whether or not | |
# the fallback algorithm is used. | |
# Be aware also that this parameter may disappear entirely in future versions of the | |
# library. In principle it should be possible to devise a good way to automatically | |
# choose which algorithm to use. Such a mechanism would render the parameter obsolete. | |
# | |
# Possible return values: | |
# | |
# BZ_CONFIG_ERROR | |
# if the library has been mis-compiled | |
# BZ_PARAM_ERROR | |
# if dest is NULL or destLen is NULL | |
# or blockSize100k < 1 or blockSize100k > 9 | |
# or verbosity < 0 or verbosity > 4 | |
# or workFactor < 0 or workFactor > 250 | |
# BZ_MEM_ERROR | |
# if insufficient memory is available | |
# BZ_OUTBUFF_FULL | |
# if the size of the compressed data exceeds *destLen | |
# BZ_OK | |
# otherwise | |
attach_function :BZ2_bzBuffToBuffCompress, | |
[:pointer, :buffer_inout, :pointer, :uint32, :int, :int, :int ], :int | |
# dst, dst_len, src, src_len, blksize100k, verbosity, workFactor | |
def self.compress(data, blksize=DEFAULT_BLK_SIZE, verbosity=0, work_factor=0) | |
# Massage parameter values | |
blksize = 1 if blksize < 1 | |
blksize = 9 if blksize > 9 | |
verbosity = 0 if verbosity < 0 | |
verbosity = 4 if verbosity > 4 | |
work_factor = 0 if work_factor < 0 | |
work_factor = 250 if work_factor > 250 | |
# Initialize our work buffers | |
# To guarantee that the compressed data will fit in its buffer, allocate an output | |
# buffer of size 1% larger than the uncompressed data, plus six hundred extra bytes. | |
out_len = data.bytesize + (data.bytesize * 0.01) + 600 | |
dst_buf = FFI::MemoryPointer.new(:char, out_len) | |
dst_len = FFI::MemoryPointer.new(:uint32) | |
dst_len.write_uint32(out_len) | |
src_buf = FFI::MemoryPointer.new(:char, data.bytesize) | |
src_buf.put_bytes(0, data) | |
# Do the work | |
ret = BZ2_bzBuffToBuffCompress(dst_buf, dst_len, src_buf, data.bytesize, | |
blksize, verbosity, work_factor) | |
# Check the return value | |
case ret | |
when BZ_OK | |
dst_buf.read_bytes(dst_len.read_uint32) | |
when BZ_PARAM_ERROR | |
raise ArgumentError, "One of blksize, verbosity or work_factor is out of range" | |
when BZ_MEM_ERROR | |
raise NoMemoryError, "Out of memory" | |
when BZ_OUTBUFF_FULL | |
raise BufferError, "Output buffer isn't large enough" | |
when BZ_CONFIG_ERROR | |
raise ConfigError, "libbz2 has ben mis-compiled" | |
else | |
raise Error, "Unhandled error code: #{ret}" | |
end | |
end | |
# small - If nonzero, the library will use an alternative decompression algorithm which | |
# uses less memory but at the cost of decompressing more slowly (roughly speaking, | |
# half the speed, but the maximum memory requirement drops to around 2300k). | |
# verbosity - Integer between 0-4. If the library has been compiled with -DBZ_NO_STDIO, | |
# no such output will appear for any verbosity setting. | |
# | |
# Possible return values: | |
# | |
# BZ_CONFIG_ERROR | |
# if the library has been mis-compiled | |
# BZ_PARAM_ERROR | |
# if dest is NULL or destLen is NULL | |
# or small != 0 && small != 1 | |
# or verbosity < 0 or verbosity > 4 | |
# BZ_MEM_ERROR | |
# if insufficient memory is available | |
# BZ_OUTBUFF_FULL | |
# if the size of the compressed data exceeds *destLen | |
# BZ_DATA_ERROR | |
# if a data integrity error was detected in the compressed data | |
# BZ_DATA_ERROR_MAGIC | |
# if the compressed data doesn't begin with the right magic bytes | |
# BZ_UNEXPECTED_EOF | |
# if the compressed data ends unexpectedly | |
# BZ_OK | |
# otherwise | |
attach_function :BZ2_bzBuffToBuffDecompress, | |
[:pointer, :buffer_inout, :pointer, :uint32, :int, :int ], :int | |
# dst, dst_len, src, src_len, small, verbosity | |
def self.decompress(data, small=0, verbosity=0) | |
# Massage parameter values | |
small = 0 if small < 0 | |
verbosity = 0 if verbosity < 0 | |
verbosity = 4 if verbosity > 4 | |
# Initialize our work buffers | |
out_len = data.bytesize * 10 # lol - this is terrible | |
dst_buf = FFI::MemoryPointer.new(:char, out_len) | |
dst_len = FFI::MemoryPointer.new(:uint32) | |
dst_len.write_uint32(out_len) | |
src_buf = FFI::MemoryPointer.new(:char, data.bytesize) | |
src_buf.put_bytes(0, data) | |
ret = BZ2_bzBuffToBuffDecompress(dst_buf, dst_len, src_buf, data.bytesize, | |
small, verbosity) | |
# Check the return value | |
case ret | |
when BZ_OK | |
dst_buf.read_bytes(dst_len.read_uint32) | |
when BZ_PARAM_ERROR | |
raise ArgumentError, "One of sall or verbosity" | |
when BZ_MEM_ERROR | |
raise NoMemoryError, "Out of memory" | |
when BZ_OUTBUFF_FULL | |
raise BufferError, "Output buffer isn't large enough" | |
when BZ_DATA_ERROR, BZ_DATA_ERROR_MAGIC, BZ_UNEXPECTED_EOF | |
raise CorruptError, "Compressed data appears to be corrupt or unreadable" | |
when BZ_CONFIG_ERROR | |
raise ConfigError, "libbz2 has ben mis-compiled" | |
else | |
raise Error, "Unhandled error code: #{ret}" | |
end | |
end | |
attach_function :BZ2_bzlibVersion, [], :string | |
BZ2_VERSION = self.BZ2_bzlibVersion | |
def self.bz2_version | |
BZ2_VERSION | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment