Skip to content

Instantly share code, notes, and snippets.

@harjitmoe
Last active November 1, 2019 16:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save harjitmoe/e695a3fd2622fdf3f67e72a871efab62 to your computer and use it in GitHub Desktop.
Save harjitmoe/e695a3fd2622fdf3f67e72a871efab62 to your computer and use it in GitHub Desktop.
Use the pickle/json/marshal basic API with NBT data.
#!/usr/bin/env python3
# -*- mode: python; coding: utf-8 -*-
""" Use the pickle/json/marshal basic API with NBT data. """
# Authored by HarJIT in 2019. This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at
# https://mozilla.org/MPL/2.0/.
from ctypes import c_byte, c_int16, c_int32, c_int64, c_float, c_double
from collections import OrderedDict
import struct, io
END = 0
BYTE = 1
SHORT = 2
INT = 3
LONG = 4
FLOAT = 5
DOUBLE = 6
BYTES = 7
STRING = 8
LIST = 9
COMPOUND = 10
INTS = 11
LONGS = 12
# Note that the nbttypewhenempty is only referenced as a last resort, i.e. for an empty array.
# The idea being so empty arrays round-trip if they're part of a part of the structure which
# the script loading/editing/dumping it doesn't do anything to.
# The original type itself might be a placeholder (END or BYTE) in the case of an empty list. So
# the content takes priority.
# Note also that the nbttypewhenempty does not mean the same thing between them: for NBTList it's
# the INNER type and for NBTTuple it's the OUTER type. For fairly obvious reasons.
class NBTList(list):
"""Subclass of list which uses a specific element type when empty and saved to NBT."""
def __init__(self, typ, *args):
self.nbttypewhenempty = typ
super().__init__(*args)
class NBTTuple(tuple):
"""Subclass of tuple which uses a specific array type when empty and saved to NBT."""
# The tuple class itself overrides __new__ so trying to add the typ argument
# by overriding __init__ does something close to squat.
def __new__(cls, typ, *args):
self = super().__new__(cls, *args)
self.nbttypewhenempty = typ
return self
# The idea's to be useful for serialising native Python stuff as well as the Minecraft stuff.
def _deduce_type(obj):
if isinstance(obj, c_byte):
return BYTE
elif isinstance(obj, c_int16):
return SHORT
elif isinstance(obj, c_int32):
return INT
elif isinstance(obj, c_int64):
return LONG
elif isinstance(obj, int):
# If passed as a Python type, assume type is insignificant and use shortest possible
# representation for that integer.
ref = obj if obj >= 0 else ~obj
if not (ref >> 7):
return BYTE
elif not (ref >> 15):
return SHORT
elif not (ref >> 31):
return INT
elif not (ref >> 63):
return LONG
else:
raise ValueError("integer too large to be stored in NBT ({!r})".format(obj))
elif isinstance(obj, c_float):
return FLOAT
elif isinstance(obj, (c_double, float)):
# Python's native "float" is actually double-precision.
# Since the different float types are as much a matter of precision as maximum,
# don't mess around trying to multiplex them like with integers.
return DOUBLE
elif isinstance(obj, (bytes, bytearray)):
return BYTES
elif isinstance(obj, str):
return STRING
elif isinstance(obj, list):
return LIST
elif isinstance(obj, dict):
return COMPOUND
elif isinstance(obj, tuple):
if not obj:
if isinstance(obj, NBTTuple):
return obj.nbttypewhenempty
raise TypeError("cannot automatically represent untyped empty tuple")
#
# For a tuple solely of native Python integers:
can32bit = True
for i in obj:
if not isinstance(i, int):
break
ref = i if i >= 0 else ~i
can32bit = can32bit and not (ref >> 31)
else: # i.e. finished without encountering break
# Don't use BYTES, since that won't round-trip to a tuple.
return INTS if can32bit else LONGS
#
# For anything else:
inner = _deduce_type(obj[0])
for i in obj:
if isinstance(i, int):
raise ValueError("mixed native and fixed-width integers in a tuple")
if _deduce_type(i) != inner:
raise ValueError("mixed-type tuples forbidden in NBT")
if inner == BYTE: # We've excluded native integers, so this is necessarily a c_byte.
return BYTES
elif inner == INT:
return INTS
elif inner == LONG:
return LONGS
else:
return LIST
else:
raise TypeError("unsupported type {!r}".format(type(obj)))
def _deduce_list_type(lst):
if not lst:
if isinstance(lst, NBTList):
return lst.nbttypewhenempty
else: # Empty list without a set type.
return BYTE # alternatively END, but that apparently breaks some older parsers.
status = None
for item in lst:
if isinstance(item, int) and status in (None, -1):
status = -1
elif status is None:
status = _deduce_type(item)
elif status == _deduce_type(item):
pass
elif (status == -1) and isinstance(item, (c_byte, c_int16, c_int32, c_int64)):
raise ValueError("mixed native and fixed-width integers in a list")
elif (status != -1) and isinstance(item, int):
raise ValueError("mixed native and fixed-width integers in a list")
else:
raise ValueError("mixed-type lists forbidden in NBT")
if status == -1:
typ = BYTE
for i in lst:
ref = i if i >= 0 else ~i
if typ in (BYTE,) and (ref >> 7):
typ = SHORT
if typ in (BYTE, SHORT) and (ref >> 15): # not elif
typ = INT
if typ in (BYTE, SHORT, INT) and (ref >> 31): # not elif
typ = LONG
if typ in (BYTE, SHORT, INT, LONG) and (ref >> 63): # not elif
raise ValueError("integer too large to be stored in NBT ({!r})".format(i))
return typ
elif status == -2:
return DOUBLE
else:
return status
def _dump(item, file, typ, endian):
# GENERAL NOTE: not using bytes() on the ctypes types besides c_byte means I don't have to care
# about the host's native formats. NBT is big-endian in MCJE and little-endian in MCBE.
if typ == BYTE:
if isinstance(item, c_byte):
file.write(bytes(item))
else:
# bytes() requires an unsigned representation, hence the &0xFF.
file.write(bytes([item & 0xFF]))
elif typ == SHORT:
if isinstance(item, c_int16):
file.write(struct.pack(endian + "h", item.value))
else:
file.write(struct.pack(endian + "h", item))
elif typ == INT:
if isinstance(item, c_int32):
file.write(struct.pack(endian + "l", item.value))
else:
file.write(struct.pack(endian + "l", item))
elif typ == LONG:
if isinstance(item, c_int64):
file.write(struct.pack(endian + "q", item.value))
else:
file.write(struct.pack(endian + "q", item))
elif typ == FLOAT:
file.write(struct.pack(endian + "f", item.value))
elif typ == DOUBLE:
if isinstance(item, c_double):
file.write(struct.pack(endian + "d", item.value))
else:
file.write(struct.pack(endian + "d", item))
elif typ == BYTES:
file.write(struct.pack(endian + "l", len(item)))
# Might be passed to us as bytes, bytearray or tuple
if item and isinstance(item[0], c_byte):
# Accepted for consistency with such representations for INTS and LONGS but
# not recommended; loads() will return a bytes object rather than this format.
file.write(b"".join(bytes(i) for i in item))
else:
file.write(bytes(item))
elif typ == STRING:
bitem = item.encode("utf-8")
file.write(struct.pack(endian + "h", len(bitem)))
file.write(bitem)
elif typ == LIST:
inner = _deduce_list_type(item)
file.write(struct.pack(endian + "B", inner))
file.write(struct.pack(endian + "l", len(item)))
for i in item:
_dump(i, file, inner, endian)
elif typ == COMPOUND:
for (name, i) in item.items():
typ = _deduce_type(i)
if not isinstance(name, str):
raise ValueError("keys must be strings")
bname = name.encode("utf-8")
file.write(struct.pack(endian + "B", typ))
file.write(struct.pack(endian + "H", len(bname)))
file.write(bname)
_dump(i, file, typ, endian)
file.write(struct.pack(endian + "B", END))
elif typ == INTS:
file.write(struct.pack(endian + "l", len(item)))
if not item:
pass
# mixed ctypes and native int would already have been raised as an error by this point.
elif isinstance(item[0], c_int32):
file.write(struct.pack(endian + ("l" * len(item)), *tuple(i.value for i in item)))
else:
file.write(struct.pack(endian + ("l" * len(item)), *item))
elif typ == LONGS:
file.write(struct.pack(endian + "l", len(item)))
if not item:
pass
# mixed ctypes and native int would already have been raised as an error by this point.
elif isinstance(item[0], c_int64):
file.write(struct.pack(endian + ("q" * len(item)), *tuple(i.value for i in item)))
else:
file.write(struct.pack(endian + ("q" * len(item)), *item))
else:
raise RuntimeError("unexpected `typ` value; this should not happen ({!r})".format(typ))
def dump(obj, file, *, name=None, endian=">"):
""" Write an object (obj) as uncompressed NBT to a file (file).
The optional name argument gives a name to the root tag.
The endian argument takes the same format as the struct module. It
should be set to ">" for MCJE data (the default) or to "<" for
MCBE (MCPE) data.
"""
file.write(struct.pack(endian + "B", _deduce_type(obj)))
if name is None:
file.write(struct.pack(endian + "H", 0))
elif not isinstance(name, str):
raise ValueError("keys must be strings")
else:
bname = name.encode("utf-8")
file.write(struct.pack(endian + "H", len(bname)))
file.write(bname)
_dump(obj, file, _deduce_type(obj), endian)
def dumps(obj, *, name=None, endian=">"):
""" Convert an object (obj) to uncompressed NBT byte data.
The optional name argument gives a name to the root tag.
The endian argument takes the same format as the struct module. It
should be set to ">" for MCJE data (the default) or to "<" for
MCBE (MCPE) data.
"""
f = io.BytesIO()
dump(obj, f, name=name, endian=endian)
return f.getvalue()
def _load(file, typ, preserve, endian):
if typ == BYTE:
ret, = file.read(1)
if preserve:
ret = c_byte(ret)
return ret
elif typ == SHORT:
ret, = struct.unpack(endian + "h", file.read(2))
if preserve:
ret = c_int16(ret)
return ret
elif typ == INT:
ret, = struct.unpack(endian + "l", file.read(4))
if preserve:
ret = c_int32(ret)
return ret
elif typ == LONG:
ret, = struct.unpack(endian + "q", file.read(8))
if preserve:
ret = c_int64(ret)
return ret
elif typ == FLOAT:
ret, = struct.unpack(endian + "f", file.read(4))
if preserve:
ret = c_float(ret)
return ret
elif typ == DOUBLE:
ret, = struct.unpack(endian + "d", file.read(8))
if preserve:
ret = c_double(ret)
return ret
elif typ == BYTES:
length, = struct.unpack(endian + "l", file.read(4))
return file.read(length)
elif typ == STRING:
length, = struct.unpack(endian + "h", file.read(2))
return file.read(length).decode("utf-8", errors="replace")
elif typ == LIST:
inner, = struct.unpack(endian + "B", file.read(1))
length, = struct.unpack(endian + "l", file.read(4))
ret = [_load(file, inner, preserve, endian) for i in range(length)]
return NBTList(inner, ret) if preserve else ret
elif typ == COMPOUND:
# OrderedDict is guaranteed to be ordered; this has only been guaranteed for dict since
# Python 3.7 (since dictionary order was implementation-defined beforehand, when this was
# actually introduced depends on implementation: it was introduced by CPython in version
# 3.6). Order of dict is non-deterministic in CPython 3.5 and arbitrary (yet deterministic)
# in earlier versions.
# Hence if we're preserving info, we want to guarantee preservation of order.
ret = OrderedDict({}) if preserve else {}
while 1:
name, i = load(file, preserve=preserve, with_name=True, endian=endian) # Not _load
if i is None:
break
elif name not in ret:
ret[name] = i
else:
raise ValueError("duplicate key: {!r}".format(name))
return ret
elif typ == INTS:
length, = struct.unpack(endian + "l", file.read(4))
ret = struct.unpack(endian + ("l" * length), file.read(4 * length)) if length else ()
if preserve:
ret = NBTTuple(typ, (c_int32(i) for i in ret))
return ret
elif typ == LONGS:
length, = struct.unpack(endian + "l", file.read(4))
ret = struct.unpack(endian + ("q" * length), file.read(8 * length)) if length else ()
if preserve:
ret = NBTTuple(typ, (c_int64(i) for i in ret))
return ret
else:
raise ValueError("unsupported NBT field type {!r}".format(typ))
def load(file, *, preserve=True, with_name=False, endian=">"):
""" Read uncompressed NBT data from a file (file), return its content.
If preserve=True (the default), efforts will be made to preserve the
original NBT data as faithfully as possible (in particular, numerical
values will be represented as ctypes wrappers). If preserve=False,
values will be returned in their natural Python representations, and
may not retain the same types if dumped again.
If with_name=False (the default), the object will be returned; an
error will be thrown if the root tag is named. If with_name=True,
a tuple of the name and the object will be returned. Both of these
are false in event of an END tag.
The endian argument takes the same format as the struct module. It
should be set to ">" for MCJE data (the default) or to "<" for
MCBE (MCPE) data.
"""
typ, = struct.unpack(endian + "B", file.read(1))
if typ == END:
return (None, None) if with_name else None
namelen, = struct.unpack(endian + "H", file.read(2))
name = file.read(namelen).decode("utf-8", errors="replace")
if with_name:
return name, _load(file, typ, preserve, endian)
elif name:
raise ValueError("named root tag without with_name=True")
else:
return _load(file, typ, preserve, endian)
def loads(dat, *, preserve=True, with_name=False, endian=">"):
""" Load an object from uncompressed NBT data (dat).
If preserve=True (the default), efforts will be made to preserve the
original NBT data as faithfully as possible (in particular, numerical
values will be represented as ctypes wrappers). If preserve=False,
values will be returned in their natural Python representations, and
may not retain the same types if dumped again.
If with_name=False (the default), the object will be returned; an
error will be thrown if the root tag is named. If with_name=True,
a tuple of the name (even if an empty string) and the object will
be returned. Both of these are None in event of an END tag.
The endian argument takes the same format as the struct module. It
should be set to ">" for MCJE data (the default) or to "<" for
MCBE (MCPE) data.
"""
return load(io.BytesIO(dat), preserve=preserve, with_name=with_name, endian=endian)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment