Skip to content

Instantly share code, notes, and snippets.

@RichardEllicott
Created May 8, 2021 21:32
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save RichardEllicott/c89c844fb6092133eb232f98be468b6f to your computer and use it in GitHub Desktop.
Save RichardEllicott/c89c844fb6092133eb232f98be468b6f to your computer and use it in GitHub Desktop.
# MIT License
#
# Copyright (c) 2018 Jelle Hermsen
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# +-------------------------------------+
# | _ _ |
# | __ _ __| |_ _ _ __ ___(_)_ __ |
# | / _` |/ _` | | | | '_ \|_ / | '_ \ |
# || (_| | (_| | |_| | | | |/ /| | |_) ||
# | \__, |\__,_|\__,_|_| |_/___|_| .__/ |
# | |___/ |_| |
# | |
# +-------------------------------------+
# | A small GDScript that allows you to |
# | browse and uncompress zip files. |
# +-------------------------------------+
# Example usage:
#
# - put gdunzip.gd somewhere in your Godot project
# - instance this scrip by using:
# var gdunzip = load('res://LOCATION_IN_LIBRARY/gdunzip.gd').new()
# - load a zip file:
# var loaded = gdunzip.load('res://PATH_TO_ZIP/test.zip')
# - if loaded is true you can try to uncompress a file:
# var uncompressed = gdunzip.uncompress('PATH_TO_FILE_IN_ZIP/test.txt')
# - now you have got a PoolByteArray named "uncompressed" with the
# uncompressed data for the given file
#
# You can iterate over the "files" variable from the gdunzip instance, to
# see all the available files:
# - for f in gdunzip.files:
# print(f['file_name'])
# The path of the currently loaded zip file
var path
# A PoolByteArray with the contents of the zip file
var buffer
# The size of the currently loaded buffer
var buffer_size
# A dictionary with the files in the currently loaded zip file.
# It maps full file paths to dictionaries with meta file information:
#
# - compression_method: -1 if uncompressed or File.COMPRESSION_DEFLATE
# - file_name: the full path of the compressed file inside the zip
# - file_header_offset: the exact byte location of this file's compressed data
# inside the zip file
# - compressed_size
# - uncompressed_size
var files = {}
# The current position we're at inside the buffer
var pos = 0
# An instance of the inner Tinf class, which is a GDScript port of Jørgen
# Ibsen's excellent "tiny inflate library"
var tinf
# Initialize the gdunzip class
func _init():
self.tinf = Tinf.new()
# Load a zip file with the given path. Returns a boolean denoting success.
func load(path):
if path == null:
return false
self.path = path
self.pos = 0
var file = File.new()
if !file.file_exists(path):
return false
file.open(path, File.READ)
var file_length = file.get_len()
if file.get_32() != 0x04034B50:
return false
file.seek(0)
self.buffer = file.get_buffer(file_length)
self.buffer_size = self.buffer.size()
file.close()
if self.buffer_size < 22:
# Definitely not a valid zip file
return false
# Fill in self.files with all the file data
return self._get_files()
# Uncompress the given file. Returns false if uncompressing fails, or when the
# file isn't available in the currently loaded zip file.
func uncompress(file_name):
if !(file_name in self.files):
return false
var f = self.files[file_name]
self.pos = f['file_header_offset']
self._skip_file_header()
var uncompressed = self._read(f['compressed_size'])
if f['compression_method'] == -1:
return uncompressed
return tinf.tinf_uncompress(f['uncompressed_size'], uncompressed)
# Returns a PoolByteArray with the compressed data for the given file.
# Returns false if it can't be found.
func get_compressed(file_name):
if !(file_name in self.files):
return false
var f = self.files[file_name]
self.pos = f['file_header_offset']
self._skip_file_header()
return self._read(f['compressed_size'])
# Parse the zip file's central directory and fill self.files with all the
# file info
func _get_files():
# Locate starting position of central directory
var eocd_offset = buffer.size() - 22
while (
eocd_offset > 0
&& not (buffer[eocd_offset+3] == 0x06
&& buffer[eocd_offset+2] == 0x05
&& buffer[eocd_offset+1] == 0x4B
&& buffer[eocd_offset] == 0x50
)
):
eocd_offset -= 1
# Set the central directory start offset
self.pos = (
buffer[eocd_offset + 19] << 24
| buffer[eocd_offset + 18] << 16
| buffer[eocd_offset + 17] << 8
| buffer[eocd_offset + 16]
)
# Get all central directory records, and fill self.files
# with all the file information
while (
buffer[pos + 3] == 0x02
&& buffer[pos + 2] == 0x01
&& buffer[pos + 1] == 0x4B
&& buffer[pos] == 0x50
):
var raw = _read(46)
var header = {
'compression_method': '',
'file_name': '',
'compressed_size': 0,
'uncompressed_size': 0,
'file_header_offset': -1,
}
if raw[10] == 0 && raw[11] == 0:
header['compression_method'] = -1
else:
header['compression_method'] = File.COMPRESSION_DEFLATE
header['compressed_size'] = (
raw[23] << 24
| raw[22] << 16
| raw[21] << 8
| raw[20]
)
header['uncompressed_size'] = (
raw[27] << 24
| raw[26] << 16
| raw[25] << 8
| raw[24]
)
header['file_header_offset'] = (
raw[45] << 24
| raw[44] << 16
| raw[43] << 8
| raw[42]
)
var file_name_length = raw[29] << 8 | raw[28]
var extra_field_length = raw[31] << 8 | raw[30]
var comment_length = raw[33] << 8 | raw[32]
var raw_end = _read(file_name_length + extra_field_length + comment_length)
if !raw_end:
return false
header['file_name'] = (
raw_end.subarray(0, file_name_length - 1).get_string_from_utf8()
)
self.files[header['file_name']] = header
return true
# Read a given number of bytes from the buffer, and return it as a
# PoolByteArray
func _read(length):
var result = buffer.subarray(pos, pos + length - 1)
if result.size() != length:
return false
pos = pos + length
return result
# Skip the given number of bytes in the buffer. Advancing "pos".
func _skip(length):
pos += length
# This function skips the file header information, when "pos" points at a file
# header inside the buffer.
func _skip_file_header():
var raw = _read(30)
if !raw:
return false
var file_name_length = raw[27] << 8 | raw[26]
var extra_field_length = raw[29] << 8 | raw[28]
var raw_end = _skip(file_name_length + extra_field_length)
# The inner Tinf class is a pretty straight port from Jørgen Ibsen's excellent
# "tiny inflate library". It's written in an imperative style and I have tried
# to stay close to the original. I added two helper functions, and had to make
# some minor additions to support "faux pointer arithmetic".
#
# I have created a TINF_TREE and TINF_DATA dictionary to serve as structs. I
# use "duplicate" to make an instance from these structs.
class Tinf:
# ----------------------------------------
# -- GDscript specific helper functions --
# ----------------------------------------
func make_pool_int_array(size):
var pool_int_array = PoolIntArray()
pool_int_array.resize(size)
return pool_int_array
func make_pool_byte_array(size):
var pool_byte_array = PoolByteArray()
pool_byte_array.resize(size)
return pool_byte_array
# ------------------------------
# -- internal data structures --
# ------------------------------
var TINF_TREE = {
'table': make_pool_int_array(16),
'trans': make_pool_int_array(288),
}
var TINF_DATA = {
'source': PoolByteArray(),
# sourcePtr is an "int" that's used to point at a location in "source".
# I added this since we don't have pointer arithmetic in GDScript.
'sourcePtr': 0,
'tag': 0,
'bitcount': 0,
'dest': PoolByteArray(),
'destLen': 0,
# "Faux pointer" to dest.
'destPtr': 0,
'ltree': TINF_TREE.duplicate(),
'dtree': TINF_TREE.duplicate()
}
const TINF_OK = 0
const TINF_DATA_ERROR = -3
# ---------------------------------------------------
# -- uninitialized global data (static structures) --
# ---------------------------------------------------
var sltree = TINF_TREE.duplicate() # fixed length/symbol tree
var sdtree = TINF_TREE.duplicate() # fixed distance tree
var base_tables = {
# extra bits and base tables for length codes
'length_bits': make_pool_byte_array(30),
'length_base': make_pool_int_array(30),
# extra bits and base tables for distance codes
'dist_bits': make_pool_byte_array(30),
'dist_base': make_pool_int_array(30)
}
var clcidx = PoolByteArray([
16, 17, 18, 0, 8, 7, 9, 6,
10, 5, 11, 4, 12, 3, 13, 2,
14, 1, 15])
# -----------------------
# -- utility functions --
# -----------------------
# build extra bits and base tables
# bits: PoolByteArray
# base: PoolIntArray
# delta: int
# first: int
func tinf_build_bits_base(target, delta, first):
# var i = 0
var sum = first
for i in range(0, delta):
base_tables[target + '_bits'][i] = 0
for i in range(0, 30 - delta):
base_tables[target + '_bits'][i + delta] = i / delta
for i in range(0, 30):
base_tables[target + '_base'][i] = sum
sum += 1 << base_tables[target + '_bits'][i]
# build the fixed huffman trees
# lt: TINF_TREE
# rt: TINF_TREE
func tinf_build_fixed_trees(lt, dt):
# var i = 0
for i in range(0, 7):
lt['table'][i] = 0
lt['table'][7] = 24
lt['table'][8] = 152
lt['table'][9] = 112
for i in range(0, 24):
lt['trans'][i] = 256 + i
for i in range(0, 144):
lt['trans'][24 + i] = i
for i in range(0, 8):
lt['trans'][24 + 144 + i] = 280 + i
for i in range(0, 112):
lt['trans'][24 + 144 + 8 + i] = 144 + i
for i in range(0, 5):
dt['table'][i] = 0
dt['table'][5] = 32
for i in range(0, 32):
dt['trans'][i] = i
# given an array of code lengths, build a tree
# t: TINF_TREE
# lengths: PoolByteArray
# num: int
func tinf_build_tree(t, lengths, num):
var offs = make_pool_int_array(16)
# var i = 0
var sum = 0
# clear code length count table
for i in range(0,16):
t['table'][i] = 0
# scan symbol lengths, and sum code length counts
for i in range(0, num):
t['table'][lengths[i]] += 1
t['table'][0] = 0
for i in range(0,16):
offs[i] = sum
sum += t['table'][i]
for i in range(0, num):
if lengths[i]:
t['trans'][offs[lengths[i]]] = i
offs[lengths[i]] += 1
# ----------------------
# -- decode functions --
# ----------------------
# get one bit from source stream
# d: TINF_DATA
# returns: int
func tinf_getbit(d):
var bit = 0
d['bitcount'] -= 1
if !(d['bitcount'] + 1) :
d['tag'] = d['source'][d['sourcePtr']]
d['sourcePtr'] += 1
d['bitcount'] = 7
bit = d['tag'] & 0x01
d['tag'] >>= 1
return bit
# read a num bit value from a stream and add base
# d: TINF_DATA
# num: int
# base: int
func tinf_read_bits(d, num, base):
var val = 0
if num:
var limit = 1 << num
var mask = 1
while mask < limit:
if tinf_getbit(d):
val += mask
mask *= 2
return val + base
# given a data stream and a tree, decode a symbol
# d: TINF_DATA
# t: TINF_TREE
func tinf_decode_symbol(d, t):
var sum = 0
var cur = 0
var length = 0
while true:
cur = 2 * cur + tinf_getbit(d)
length += 1
sum += t['table'][length]
cur -= t['table'][length]
if cur < 0:
break
return t['trans'][sum + cur]
# given a data stream, decode dynamic trees from it
# d: TINF_DATA
# lt: TINF_TREE
# dt: TINF_TREE
func tinf_decode_trees(d, lt, dt):
var code_tree = TINF_TREE.duplicate()
var lengths = make_pool_byte_array(288 + 32)
var hlit = 0
var hdist = 0
var hclen = 0
# var i = 0
var num = 0
var length = 0
# get 5 bits HLIT (257-286)
hlit = tinf_read_bits(d, 5, 257)
# get 5 bits HDIST (1-32)
hdist = tinf_read_bits(d, 5, 1)
# get 4 bits HCLEN (4-19)
hclen = tinf_read_bits(d, 4, 4)
for i in range(0, 19):
lengths[i] = 0
for i in range(0, hclen):
var clen = tinf_read_bits(d, 3, 0)
lengths[clcidx[i]] = clen
# build code length tree
tinf_build_tree(code_tree, lengths, 19)
while num < hlit + hdist:
var sym = tinf_decode_symbol(d, code_tree)
match sym:
16:
var prev = lengths[num - 1]
length = tinf_read_bits(d, 2, 3)
while length != 0:
lengths[num] = prev
num += 1
length -= 1
17:
length = tinf_read_bits(d, 3, 3)
while length != 0:
lengths[num] = 0
num += 1
length -= 1
18:
length = tinf_read_bits(d, 7, 11)
while length != 0:
lengths[num] = 0
num += 1
length -= 1
_:
lengths[num] = sym
num += 1
# build dynamic trees
tinf_build_tree(lt, lengths, hlit)
tinf_build_tree(dt, lengths.subarray(hlit, lengths.size() - 1), hdist)
# -----------------------------
# -- block inflate functions --
# -----------------------------
# given a stream and two trees, inflate a block of data
# d: TINF_DATA
# lt: TINF_TREE
# dt: TINF_TREE
func tinf_inflate_block_data(d, lt, dt):
var start = d['destPtr']
while true:
var sym = tinf_decode_symbol(d, lt)
if sym == 256:
d['destLen'] += d['destPtr'] - start
return TINF_OK
if sym < 256:
d['dest'][d['destPtr']] = sym
d['destPtr'] += 1
else:
var length = 0
var dist = 0
var offs = 0
# var i = 0
var ptr = d['destPtr']
sym -= 257
length = tinf_read_bits(d, base_tables['length_bits'][sym], base_tables['length_base'][sym])
dist = tinf_decode_symbol(d, dt)
# possibly get more bits from distance code
offs = tinf_read_bits(d, base_tables['dist_bits'][dist], base_tables['dist_base'][dist])
for i in range(0, length):
d['dest'][ptr + i] = d['dest'][ptr + (i - offs)]
d['destPtr'] += length
# inflate an uncompressed block of data */
# d: TINF_DATA
func tinf_inflate_uncompressed_block(d):
var length = 0
var invlength = 0
var i = 0
# get length
length = d['source'][d['sourcePtr'] + 1]
length = 256 * length + d['source'][d['sourcePtr']]
# get one's complement of length
invlength = d['source'][d['sourcePtr'] + 3]
invlength = 256 * invlength + d['source'][d['sourcePtr'] + 2]
if length != ~invlength & 0x0000FFFF:
return TINF_DATA_ERROR
d['sourcePtr'] += 4
i = length
while i:
d['dest'][d['destPtr']] = d['source'][d['sourcePtr']]
d['destPtr'] += 1
d['sourcePtr'] += 1
i -= 1
d['bitcount'] = 0
d['destLen'] += length
return TINF_OK
# inflate a block of data compressed with fixed huffman trees
# d: TINF_DATA
# returns: int
func tinf_inflate_fixed_block(d):
# decode block using fixed trees
return tinf_inflate_block_data(d, sltree, sdtree)
# inflate a block of data compressed with dynamic huffman trees
# d: TINF_DATA
# returns: int
func tinf_inflate_dynamic_block(d):
# decode trees from stream
tinf_decode_trees(d, d['ltree'], d['dtree'])
# decode block using decoded trees
return tinf_inflate_block_data(d, d['ltree'], d['dtree'])
# ----------------------
# -- public functions --
# ----------------------
func _init():
# build fixed huffman trees
tinf_build_fixed_trees(sltree, sdtree)
# build extra bits and base tables
tinf_build_bits_base('length', 4, 3)
tinf_build_bits_base('dist', 2, 1)
# fix a special case
base_tables['length_bits'][28] = 0
base_tables['length_base'][28] = 258
# inflate stream from source to dest
func tinf_uncompress(destLen, source):
var d = TINF_DATA.duplicate()
var dest = make_pool_byte_array(destLen)
var sourceSize = source.size()
d['source'] = source
d['dest'] = dest
destLen = 0
while true:
var btype = 0
var res = 0
# Skip final block flag
tinf_getbit(d)
# read block type (2 bits)
btype = tinf_read_bits(d, 2, 0)
match btype:
0:
# decompress uncompressed block
res = tinf_inflate_uncompressed_block(d)
1:
# decompress block with fixed huffman trees
res = tinf_inflate_fixed_block(d)
2:
# decompress block with dynamic huffman trees
res = tinf_inflate_dynamic_block(d)
_:
return false
if res != TINF_OK:
return false
# When we have consumed the entire source, we're done
if d['sourcePtr'] >= sourceSize:
break
return d['dest']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment