harjitmoe/nbtpickle.py

## nbtpickle.py
#!/usr/bin/env python3
# -*- mode: python; coding: utf-8 -*-

""" Use the pickle/json/marshal basic API with NBT data. """

# Authored by HarJIT in 2019. This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at
# https://mozilla.org/MPL/2.0/.

from ctypes import c_byte, c_int16, c_int32, c_int64, c_float, c_double
from collections import OrderedDict
import struct, io

END = 0
BYTE = 1
SHORT = 2
INT = 3
LONG = 4
FLOAT = 5
DOUBLE = 6
BYTES = 7
STRING = 8
LIST = 9
COMPOUND = 10
INTS = 11
LONGS = 12

# Note that the nbttypewhenempty is only referenced as a last resort, i.e. for an empty array.
# The idea being so empty arrays round-trip if they're part of a part of the structure which
# the script loading/editing/dumping it doesn't do anything to.
# The original type itself might be a placeholder (END or BYTE) in the case of an empty list. So
# the content takes priority.
# Note also that the nbttypewhenempty does not mean the same thing between them: for NBTList it's
# the INNER type and for NBTTuple it's the OUTER type. For fairly obvious reasons.
class NBTList(list):
    """Subclass of list which uses a specific element type when empty and saved to NBT."""
    def __init__(self, typ, *args):
        self.nbttypewhenempty = typ
        super().__init__(*args)
class NBTTuple(tuple):
    """Subclass of tuple which uses a specific array type when empty and saved to NBT."""
    # The tuple class itself overrides __new__ so trying to add the typ argument
    # by overriding __init__ does something close to squat.
    def __new__(cls, typ, *args):
        self = super().__new__(cls, *args)
        self.nbttypewhenempty = typ
        return self

# The idea's to be useful for serialising native Python stuff as well as the Minecraft stuff.
def _deduce_type(obj):
    if isinstance(obj, c_byte):
        return BYTE
    elif isinstance(obj, c_int16):
        return SHORT
    elif isinstance(obj, c_int32):
        return INT
    elif isinstance(obj, c_int64):
        return LONG
    elif isinstance(obj, int):
        # If passed as a Python type, assume type is insignificant and use shortest possible
        # representation for that integer.
        ref = obj if obj >= 0 else ~obj
        if not (ref >> 7):
            return BYTE
        elif not (ref >> 15):
            return SHORT
        elif not (ref >> 31):
            return INT
        elif not (ref >> 63):
            return LONG
        else:
            raise ValueError("integer too large to be stored in NBT ({!r})".format(obj))
    elif isinstance(obj, c_float):
        return FLOAT
    elif isinstance(obj, (c_double, float)):
        # Python's native "float" is actually double-precision.
        # Since the different float types are as much a matter of precision as maximum,
        # don't mess around trying to multiplex them like with integers.
        return DOUBLE
    elif isinstance(obj, (bytes, bytearray)):
        return BYTES
    elif isinstance(obj, str):
        return STRING
    elif isinstance(obj, list):
        return LIST
    elif isinstance(obj, dict):
        return COMPOUND
    elif isinstance(obj, tuple):
        if not obj:
            if isinstance(obj, NBTTuple):
                return obj.nbttypewhenempty
            raise TypeError("cannot automatically represent untyped empty tuple")
        #
        # For a tuple solely of native Python integers:
        can32bit = True
        for i in obj:
            if not isinstance(i, int):
                break
            ref = i if i >= 0 else ~i
            can32bit = can32bit and not (ref >> 31)
        else: # i.e. finished without encountering break
            # Don't use BYTES, since that won't round-trip to a tuple.
            return INTS if can32bit else LONGS
        #
        # For anything else:
        inner = _deduce_type(obj[0])
        for i in obj:
            if isinstance(i, int):
                raise ValueError("mixed native and fixed-width integers in a tuple")
            if _deduce_type(i) != inner:
                raise ValueError("mixed-type tuples forbidden in NBT")
        if inner == BYTE: # We've excluded native integers, so this is necessarily a c_byte.
            return BYTES
        elif inner == INT:
            return INTS
        elif inner == LONG:
            return LONGS
        else:
            return LIST
    else:
        raise TypeError("unsupported type {!r}".format(type(obj)))

def _deduce_list_type(lst):
    if not lst:
        if isinstance(lst, NBTList):
            return lst.nbttypewhenempty
        else: # Empty list without a set type.
            return BYTE # alternatively END, but that apparently breaks some older parsers.
    status = None
    for item in lst:
        if isinstance(item, int) and status in (None, -1):
            status = -1
        elif status is None:
            status = _deduce_type(item)
        elif status == _deduce_type(item):
            pass
        elif (status == -1) and isinstance(item, (c_byte, c_int16, c_int32, c_int64)):
            raise ValueError("mixed native and fixed-width integers in a list")
        elif (status != -1) and isinstance(item, int):
            raise ValueError("mixed native and fixed-width integers in a list")
        else:
            raise ValueError("mixed-type lists forbidden in NBT")
    if status == -1:
        typ = BYTE
        for i in lst:
            ref = i if i >= 0 else ~i
            if typ in (BYTE,) and (ref >> 7):
                typ = SHORT
            if typ in (BYTE, SHORT) and (ref >> 15): # not elif
                typ = INT
            if typ in (BYTE, SHORT, INT) and (ref >> 31): # not elif
                typ = LONG
            if typ in (BYTE, SHORT, INT, LONG) and (ref >> 63): # not elif
                raise ValueError("integer too large to be stored in NBT ({!r})".format(i))
        return typ
    elif status == -2:
        return DOUBLE
    else:
        return status

def _dump(item, file, typ, endian):
    # GENERAL NOTE: not using bytes() on the ctypes types besides c_byte means I don't have to care
    # about the host's native formats. NBT is big-endian in MCJE and little-endian in MCBE.
    if typ == BYTE:
        if isinstance(item, c_byte):
            file.write(bytes(item))
        else:
            # bytes() requires an unsigned representation, hence the &0xFF.
            file.write(bytes([item & 0xFF]))
    elif typ == SHORT:
        if isinstance(item, c_int16):
            file.write(struct.pack(endian + "h", item.value))
        else:
            file.write(struct.pack(endian + "h", item))
    elif typ == INT:
        if isinstance(item, c_int32):
            file.write(struct.pack(endian + "l", item.value))
        else:
            file.write(struct.pack(endian + "l", item))
    elif typ == LONG:
        if isinstance(item, c_int64):
            file.write(struct.pack(endian + "q", item.value))
        else:
            file.write(struct.pack(endian + "q", item))
    elif typ == FLOAT:
        file.write(struct.pack(endian + "f", item.value))
    elif typ == DOUBLE:
        if isinstance(item, c_double):
            file.write(struct.pack(endian + "d", item.value))
        else:
            file.write(struct.pack(endian + "d", item))
    elif typ == BYTES:
        file.write(struct.pack(endian + "l", len(item)))
        # Might be passed to us as bytes, bytearray or tuple
        if item and isinstance(item[0], c_byte):
            # Accepted for consistency with such representations for INTS and LONGS but
            # not recommended; loads() will return a bytes object rather than this format.
            file.write(b"".join(bytes(i) for i in item))
        else:
            file.write(bytes(item))
    elif typ == STRING:
        bitem = item.encode("utf-8")
        file.write(struct.pack(endian + "h", len(bitem)))
        file.write(bitem)
    elif typ == LIST:
        inner = _deduce_list_type(item)
        file.write(struct.pack(endian + "B", inner))
        file.write(struct.pack(endian + "l", len(item)))
        for i in item:
            _dump(i, file, inner, endian)
    elif typ == COMPOUND:
        for (name, i) in item.items():
            typ = _deduce_type(i)
            if not isinstance(name, str):
                raise ValueError("keys must be strings")
            bname = name.encode("utf-8")
            file.write(struct.pack(endian + "B", typ))
            file.write(struct.pack(endian + "H", len(bname)))
            file.write(bname)
            _dump(i, file, typ, endian)
        file.write(struct.pack(endian + "B", END))
    elif typ == INTS:
        file.write(struct.pack(endian + "l", len(item)))
        if not item:
            pass
        # mixed ctypes and native int would already have been raised as an error by this point.
        elif isinstance(item[0], c_int32):
            file.write(struct.pack(endian + ("l" * len(item)), *tuple(i.value for i in item)))
        else:
            file.write(struct.pack(endian + ("l" * len(item)), *item))
    elif typ == LONGS:
        file.write(struct.pack(endian + "l", len(item)))
        if not item:
            pass
        # mixed ctypes and native int would already have been raised as an error by this point.
        elif isinstance(item[0], c_int64):
            file.write(struct.pack(endian + ("q" * len(item)), *tuple(i.value for i in item)))
        else:
            file.write(struct.pack(endian + ("q" * len(item)), *item))
    else:
        raise RuntimeError("unexpected `typ` value; this should not happen ({!r})".format(typ))

def dump(obj, file, *, name=None, endian=">"):
    """ Write an object (obj) as uncompressed NBT to a file (file).

    The optional name argument gives a name to the root tag.

    The endian argument takes the same format as the struct module. It
    should be set to ">" for MCJE data (the default) or to "<" for
    MCBE (MCPE) data.
    """
    file.write(struct.pack(endian + "B", _deduce_type(obj)))
    if name is None:
        file.write(struct.pack(endian + "H", 0))
    elif not isinstance(name, str):
        raise ValueError("keys must be strings")
    else:
        bname = name.encode("utf-8")
        file.write(struct.pack(endian + "H", len(bname)))
        file.write(bname)
    _dump(obj, file, _deduce_type(obj), endian)

def dumps(obj, *, name=None, endian=">"):
    """ Convert an object (obj) to uncompressed NBT byte data.

    The optional name argument gives a name to the root tag.

    The endian argument takes the same format as the struct module. It
    should be set to ">" for MCJE data (the default) or to "<" for
    MCBE (MCPE) data.
    """
    f = io.BytesIO()
    dump(obj, f, name=name, endian=endian)
    return f.getvalue()

def _load(file, typ, preserve, endian):
    if typ == BYTE:
        ret, = file.read(1)
        if preserve:
            ret = c_byte(ret)
        return ret
    elif typ == SHORT:
        ret, = struct.unpack(endian + "h", file.read(2))
        if preserve:
            ret = c_int16(ret)
        return ret
    elif typ == INT:
        ret, = struct.unpack(endian + "l", file.read(4))
        if preserve:
            ret = c_int32(ret)
        return ret
    elif typ == LONG:
        ret, = struct.unpack(endian + "q", file.read(8))
        if preserve:
            ret = c_int64(ret)
        return ret
    elif typ == FLOAT:
        ret, = struct.unpack(endian + "f", file.read(4))
        if preserve:
            ret = c_float(ret)
        return ret
    elif typ == DOUBLE:
        ret, = struct.unpack(endian + "d", file.read(8))
        if preserve:
            ret = c_double(ret)
        return ret
    elif typ == BYTES:
        length, = struct.unpack(endian + "l", file.read(4))
        return file.read(length)
    elif typ == STRING:
        length, = struct.unpack(endian + "h", file.read(2))
        return file.read(length).decode("utf-8", errors="replace")
    elif typ == LIST:
        inner, = struct.unpack(endian + "B", file.read(1))
        length, = struct.unpack(endian + "l", file.read(4))
        ret = [_load(file, inner, preserve, endian) for i in range(length)]
        return NBTList(inner, ret) if preserve else ret
    elif typ == COMPOUND:
        # OrderedDict is guaranteed to be ordered; this has only been guaranteed for dict since
        # Python 3.7 (since dictionary order was implementation-defined beforehand, when this was
        # actually introduced depends on implementation: it was introduced by CPython in version
        # 3.6). Order of dict is non-deterministic in CPython 3.5 and arbitrary (yet deterministic)
        # in earlier versions.
        # Hence if we're preserving info, we want to guarantee preservation of order.
        ret = OrderedDict({}) if preserve else {}
        while 1:
            name, i = load(file, preserve=preserve, with_name=True, endian=endian) # Not _load
            if i is None:
                break
            elif name not in ret:
                ret[name] = i
            else:
                raise ValueError("duplicate key: {!r}".format(name))
        return ret
    elif typ == INTS:
        length, = struct.unpack(endian + "l", file.read(4))
        ret = struct.unpack(endian + ("l" * length), file.read(4 * length)) if length else ()
        if preserve:
            ret = NBTTuple(typ, (c_int32(i) for i in ret))
        return ret
    elif typ == LONGS:
        length, = struct.unpack(endian + "l", file.read(4))
        ret = struct.unpack(endian + ("q" * length), file.read(8 * length)) if length else ()
        if preserve:
            ret = NBTTuple(typ, (c_int64(i) for i in ret))
        return ret
    else:
        raise ValueError("unsupported NBT field type {!r}".format(typ))

def load(file, *, preserve=True, with_name=False, endian=">"):
    """ Read uncompressed NBT data from a file (file), return its content.

    If preserve=True (the default), efforts will be made to preserve the
    original NBT data as faithfully as possible (in particular, numerical
    values will be represented as ctypes wrappers). If preserve=False,
    values will be returned in their natural Python representations, and
    may not retain the same types if dumped again.

    If with_name=False (the default), the object will be returned; an
    error will be thrown if the root tag is named. If with_name=True,
    a tuple of the name and the object will be returned. Both of these
    are false in event of an END tag.

    The endian argument takes the same format as the struct module. It
    should be set to ">" for MCJE data (the default) or to "<" for
    MCBE (MCPE) data.
    """
    typ, = struct.unpack(endian + "B", file.read(1))
    if typ == END:
        return (None, None) if with_name else None
    namelen, = struct.unpack(endian + "H", file.read(2))
    name = file.read(namelen).decode("utf-8", errors="replace")
    if with_name:
        return name, _load(file, typ, preserve, endian)
    elif name:
        raise ValueError("named root tag without with_name=True")
    else:
        return _load(file, typ, preserve, endian)

def loads(dat, *, preserve=True, with_name=False, endian=">"):
    """ Load an object from uncompressed NBT data (dat).

    If preserve=True (the default), efforts will be made to preserve the
    original NBT data as faithfully as possible (in particular, numerical
    values will be represented as ctypes wrappers). If preserve=False,
    values will be returned in their natural Python representations, and
    may not retain the same types if dumped again.

    If with_name=False (the default), the object will be returned; an
    error will be thrown if the root tag is named. If with_name=True,
    a tuple of the name (even if an empty string) and the object will
    be returned. Both of these are None in event of an END tag.

    The endian argument takes the same format as the struct module. It
    should be set to ">" for MCJE data (the default) or to "<" for
    MCBE (MCPE) data.
    """
    return load(io.BytesIO(dat), preserve=preserve, with_name=with_name, endian=endian)
	#!/usr/bin/env python3
	# -- mode: python; coding: utf-8 --

	""" Use the pickle/json/marshal basic API with NBT data. """

	# Authored by HarJIT in 2019. This Source Code Form is subject to the terms of the Mozilla Public
	# License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at
	# https://mozilla.org/MPL/2.0/.

	from ctypes import c_byte, c_int16, c_int32, c_int64, c_float, c_double
	from collections import OrderedDict
	import struct, io

	END = 0
	BYTE = 1
	SHORT = 2
	INT = 3
	LONG = 4
	FLOAT = 5
	DOUBLE = 6
	BYTES = 7
	STRING = 8
	LIST = 9
	COMPOUND = 10
	INTS = 11
	LONGS = 12

	# Note that the nbttypewhenempty is only referenced as a last resort, i.e. for an empty array.
	# The idea being so empty arrays round-trip if they're part of a part of the structure which
	# the script loading/editing/dumping it doesn't do anything to.
	# The original type itself might be a placeholder (END or BYTE) in the case of an empty list. So
	# the content takes priority.
	# Note also that the nbttypewhenempty does not mean the same thing between them: for NBTList it's
	# the INNER type and for NBTTuple it's the OUTER type. For fairly obvious reasons.
	class NBTList(list):
	"""Subclass of list which uses a specific element type when empty and saved to NBT."""
	def __init__(self, typ, *args):
	self.nbttypewhenempty = typ
	super().__init__(*args)
	class NBTTuple(tuple):
	"""Subclass of tuple which uses a specific array type when empty and saved to NBT."""
	# The tuple class itself overrides __new__ so trying to add the typ argument
	# by overriding __init__ does something close to squat.
	def __new__(cls, typ, *args):
	self = super().__new__(cls, *args)
	self.nbttypewhenempty = typ
	return self

	# The idea's to be useful for serialising native Python stuff as well as the Minecraft stuff.
	def _deduce_type(obj):
	if isinstance(obj, c_byte):
	return BYTE
	elif isinstance(obj, c_int16):
	return SHORT
	elif isinstance(obj, c_int32):
	return INT
	elif isinstance(obj, c_int64):
	return LONG
	elif isinstance(obj, int):
	# If passed as a Python type, assume type is insignificant and use shortest possible
	# representation for that integer.
	ref = obj if obj >= 0 else ~obj
	if not (ref >> 7):
	return BYTE
	elif not (ref >> 15):
	return SHORT
	elif not (ref >> 31):
	return INT
	elif not (ref >> 63):
	return LONG
	else:
	raise ValueError("integer too large to be stored in NBT ({!r})".format(obj))
	elif isinstance(obj, c_float):
	return FLOAT
	elif isinstance(obj, (c_double, float)):
	# Python's native "float" is actually double-precision.
	# Since the different float types are as much a matter of precision as maximum,
	# don't mess around trying to multiplex them like with integers.
	return DOUBLE
	elif isinstance(obj, (bytes, bytearray)):
	return BYTES
	elif isinstance(obj, str):
	return STRING
	elif isinstance(obj, list):
	return LIST
	elif isinstance(obj, dict):
	return COMPOUND
	elif isinstance(obj, tuple):
	if not obj:
	if isinstance(obj, NBTTuple):
	return obj.nbttypewhenempty
	raise TypeError("cannot automatically represent untyped empty tuple")
	#
	# For a tuple solely of native Python integers:
	can32bit = True
	for i in obj:
	if not isinstance(i, int):
	break
	ref = i if i >= 0 else ~i
	can32bit = can32bit and not (ref >> 31)
	else: # i.e. finished without encountering break
	# Don't use BYTES, since that won't round-trip to a tuple.
	return INTS if can32bit else LONGS
	#
	# For anything else:
	inner = _deduce_type(obj[0])
	for i in obj:
	if isinstance(i, int):
	raise ValueError("mixed native and fixed-width integers in a tuple")
	if _deduce_type(i) != inner:
	raise ValueError("mixed-type tuples forbidden in NBT")
	if inner == BYTE: # We've excluded native integers, so this is necessarily a c_byte.
	return BYTES
	elif inner == INT:
	return INTS
	elif inner == LONG:
	return LONGS
	else:
	return LIST
	else:
	raise TypeError("unsupported type {!r}".format(type(obj)))

	def _deduce_list_type(lst):
	if not lst:
	if isinstance(lst, NBTList):
	return lst.nbttypewhenempty
	else: # Empty list without a set type.
	return BYTE # alternatively END, but that apparently breaks some older parsers.
	status = None
	for item in lst:
	if isinstance(item, int) and status in (None, -1):
	status = -1
	elif status is None:
	status = _deduce_type(item)
	elif status == _deduce_type(item):
	pass
	elif (status == -1) and isinstance(item, (c_byte, c_int16, c_int32, c_int64)):
	raise ValueError("mixed native and fixed-width integers in a list")
	elif (status != -1) and isinstance(item, int):
	raise ValueError("mixed native and fixed-width integers in a list")
	else:
	raise ValueError("mixed-type lists forbidden in NBT")
	if status == -1:
	typ = BYTE
	for i in lst:
	ref = i if i >= 0 else ~i
	if typ in (BYTE,) and (ref >> 7):
	typ = SHORT
	if typ in (BYTE, SHORT) and (ref >> 15): # not elif
	typ = INT
	if typ in (BYTE, SHORT, INT) and (ref >> 31): # not elif
	typ = LONG
	if typ in (BYTE, SHORT, INT, LONG) and (ref >> 63): # not elif
	raise ValueError("integer too large to be stored in NBT ({!r})".format(i))
	return typ
	elif status == -2:
	return DOUBLE
	else:
	return status

	def _dump(item, file, typ, endian):
	# GENERAL NOTE: not using bytes() on the ctypes types besides c_byte means I don't have to care
	# about the host's native formats. NBT is big-endian in MCJE and little-endian in MCBE.
	if typ == BYTE:
	if isinstance(item, c_byte):
	file.write(bytes(item))
	else:
	# bytes() requires an unsigned representation, hence the &0xFF.
	file.write(bytes([item & 0xFF]))
	elif typ == SHORT:
	if isinstance(item, c_int16):
	file.write(struct.pack(endian + "h", item.value))
	else:
	file.write(struct.pack(endian + "h", item))
	elif typ == INT:
	if isinstance(item, c_int32):
	file.write(struct.pack(endian + "l", item.value))
	else:
	file.write(struct.pack(endian + "l", item))
	elif typ == LONG:
	if isinstance(item, c_int64):
	file.write(struct.pack(endian + "q", item.value))
	else:
	file.write(struct.pack(endian + "q", item))
	elif typ == FLOAT:
	file.write(struct.pack(endian + "f", item.value))
	elif typ == DOUBLE:
	if isinstance(item, c_double):
	file.write(struct.pack(endian + "d", item.value))
	else:
	file.write(struct.pack(endian + "d", item))
	elif typ == BYTES:
	file.write(struct.pack(endian + "l", len(item)))
	# Might be passed to us as bytes, bytearray or tuple
	if item and isinstance(item[0], c_byte):
	# Accepted for consistency with such representations for INTS and LONGS but
	# not recommended; loads() will return a bytes object rather than this format.
	file.write(b"".join(bytes(i) for i in item))
	else:
	file.write(bytes(item))
	elif typ == STRING:
	bitem = item.encode("utf-8")
	file.write(struct.pack(endian + "h", len(bitem)))
	file.write(bitem)
	elif typ == LIST:
	inner = _deduce_list_type(item)
	file.write(struct.pack(endian + "B", inner))
	file.write(struct.pack(endian + "l", len(item)))
	for i in item:
	_dump(i, file, inner, endian)
	elif typ == COMPOUND:
	for (name, i) in item.items():
	typ = _deduce_type(i)
	if not isinstance(name, str):
	raise ValueError("keys must be strings")
	bname = name.encode("utf-8")
	file.write(struct.pack(endian + "B", typ))
	file.write(struct.pack(endian + "H", len(bname)))
	file.write(bname)
	_dump(i, file, typ, endian)
	file.write(struct.pack(endian + "B", END))
	elif typ == INTS:
	file.write(struct.pack(endian + "l", len(item)))
	if not item:
	pass
	# mixed ctypes and native int would already have been raised as an error by this point.
	elif isinstance(item[0], c_int32):
	file.write(struct.pack(endian + ("l" * len(item)), *tuple(i.value for i in item)))
	else:
	file.write(struct.pack(endian + ("l" * len(item)), *item))
	elif typ == LONGS:
	file.write(struct.pack(endian + "l", len(item)))
	if not item:
	pass
	# mixed ctypes and native int would already have been raised as an error by this point.
	elif isinstance(item[0], c_int64):
	file.write(struct.pack(endian + ("q" * len(item)), *tuple(i.value for i in item)))
	else:
	file.write(struct.pack(endian + ("q" * len(item)), *item))
	else:
	raise RuntimeError("unexpected `typ` value; this should not happen ({!r})".format(typ))

	def dump(obj, file, *, name=None, endian=">"):
	""" Write an object (obj) as uncompressed NBT to a file (file).

	The optional name argument gives a name to the root tag.

	The endian argument takes the same format as the struct module. It
	should be set to ">" for MCJE data (the default) or to "<" for
	MCBE (MCPE) data.
	"""
	file.write(struct.pack(endian + "B", _deduce_type(obj)))
	if name is None:
	file.write(struct.pack(endian + "H", 0))
	elif not isinstance(name, str):
	raise ValueError("keys must be strings")
	else:
	bname = name.encode("utf-8")
	file.write(struct.pack(endian + "H", len(bname)))
	file.write(bname)
	_dump(obj, file, _deduce_type(obj), endian)

	def dumps(obj, *, name=None, endian=">"):
	""" Convert an object (obj) to uncompressed NBT byte data.

	The optional name argument gives a name to the root tag.

	The endian argument takes the same format as the struct module. It
	should be set to ">" for MCJE data (the default) or to "<" for
	MCBE (MCPE) data.
	"""
	f = io.BytesIO()
	dump(obj, f, name=name, endian=endian)
	return f.getvalue()

	def _load(file, typ, preserve, endian):
	if typ == BYTE:
	ret, = file.read(1)
	if preserve:
	ret = c_byte(ret)
	return ret
	elif typ == SHORT:
	ret, = struct.unpack(endian + "h", file.read(2))
	if preserve:
	ret = c_int16(ret)
	return ret
	elif typ == INT:
	ret, = struct.unpack(endian + "l", file.read(4))
	if preserve:
	ret = c_int32(ret)
	return ret
	elif typ == LONG:
	ret, = struct.unpack(endian + "q", file.read(8))
	if preserve:
	ret = c_int64(ret)
	return ret
	elif typ == FLOAT:
	ret, = struct.unpack(endian + "f", file.read(4))
	if preserve:
	ret = c_float(ret)
	return ret
	elif typ == DOUBLE:
	ret, = struct.unpack(endian + "d", file.read(8))
	if preserve:
	ret = c_double(ret)
	return ret
	elif typ == BYTES:
	length, = struct.unpack(endian + "l", file.read(4))
	return file.read(length)
	elif typ == STRING:
	length, = struct.unpack(endian + "h", file.read(2))
	return file.read(length).decode("utf-8", errors="replace")
	elif typ == LIST:
	inner, = struct.unpack(endian + "B", file.read(1))
	length, = struct.unpack(endian + "l", file.read(4))
	ret = [_load(file, inner, preserve, endian) for i in range(length)]
	return NBTList(inner, ret) if preserve else ret
	elif typ == COMPOUND:
	# OrderedDict is guaranteed to be ordered; this has only been guaranteed for dict since
	# Python 3.7 (since dictionary order was implementation-defined beforehand, when this was
	# actually introduced depends on implementation: it was introduced by CPython in version
	# 3.6). Order of dict is non-deterministic in CPython 3.5 and arbitrary (yet deterministic)
	# in earlier versions.
	# Hence if we're preserving info, we want to guarantee preservation of order.
	ret = OrderedDict({}) if preserve else {}
	while 1:
	name, i = load(file, preserve=preserve, with_name=True, endian=endian) # Not _load
	if i is None:
	break
	elif name not in ret:
	ret[name] = i
	else:
	raise ValueError("duplicate key: {!r}".format(name))
	return ret
	elif typ == INTS:
	length, = struct.unpack(endian + "l", file.read(4))
	ret = struct.unpack(endian + ("l" * length), file.read(4 * length)) if length else ()
	if preserve:
	ret = NBTTuple(typ, (c_int32(i) for i in ret))
	return ret
	elif typ == LONGS:
	length, = struct.unpack(endian + "l", file.read(4))
	ret = struct.unpack(endian + ("q" * length), file.read(8 * length)) if length else ()
	if preserve:
	ret = NBTTuple(typ, (c_int64(i) for i in ret))
	return ret
	else:
	raise ValueError("unsupported NBT field type {!r}".format(typ))

	def load(file, *, preserve=True, with_name=False, endian=">"):
	""" Read uncompressed NBT data from a file (file), return its content.

	If preserve=True (the default), efforts will be made to preserve the
	original NBT data as faithfully as possible (in particular, numerical
	values will be represented as ctypes wrappers). If preserve=False,
	values will be returned in their natural Python representations, and
	may not retain the same types if dumped again.

	If with_name=False (the default), the object will be returned; an
	error will be thrown if the root tag is named. If with_name=True,
	a tuple of the name and the object will be returned. Both of these
	are false in event of an END tag.

	The endian argument takes the same format as the struct module. It
	should be set to ">" for MCJE data (the default) or to "<" for
	MCBE (MCPE) data.
	"""
	typ, = struct.unpack(endian + "B", file.read(1))
	if typ == END:
	return (None, None) if with_name else None
	namelen, = struct.unpack(endian + "H", file.read(2))
	name = file.read(namelen).decode("utf-8", errors="replace")
	if with_name:
	return name, _load(file, typ, preserve, endian)
	elif name:
	raise ValueError("named root tag without with_name=True")
	else:
	return _load(file, typ, preserve, endian)

	def loads(dat, *, preserve=True, with_name=False, endian=">"):
	""" Load an object from uncompressed NBT data (dat).

	If preserve=True (the default), efforts will be made to preserve the
	original NBT data as faithfully as possible (in particular, numerical
	values will be represented as ctypes wrappers). If preserve=False,
	values will be returned in their natural Python representations, and
	may not retain the same types if dumped again.

	If with_name=False (the default), the object will be returned; an
	error will be thrown if the root tag is named. If with_name=True,
	a tuple of the name (even if an empty string) and the object will
	be returned. Both of these are None in event of an END tag.

	The endian argument takes the same format as the struct module. It
	should be set to ">" for MCJE data (the default) or to "<" for
	MCBE (MCPE) data.
	"""
	return load(io.BytesIO(dat), preserve=preserve, with_name=with_name, endian=endian)