Skip to content

Instantly share code, notes, and snippets.

@nwesterhausen
Last active October 26, 2023 14:34
Show Gist options
  • Save nwesterhausen/527fb947d4432c1f40c06dca07cb9253 to your computer and use it in GitHub Desktop.
Save nwesterhausen/527fb947d4432c1f40c06dca07cb9253 to your computer and use it in GitHub Desktop.
Python class to read minecraft region files.
"""Code to read .mca region files
I modified the javascript library mca-js to create this file.
mca-js: https://github.com/thejonwithnoh/mca-js
This is largely just a python interpretation of that script.
-----------
MIT License
Copyright (c) 2019 Nicholas Westerhausen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
import gzip
import zlib
class Mca:
"""Class used to read Minecraft region files and the chunk information contained within.
Use by creating a new object with the filepath for the region file. Then you can get_timestamp and get_data
for individual chunks in the region by specifying the chunkX or chunkY if they were to always go from 0 to 32
for each region.
region = Mca('/opt/mc/region/r.1.1.mca')
nbt = region.get_data(0,0) # gets the raw nbt data for chunk 0,0
# here you can do stuff with that nbt data. It is the same format as if you open('level.dat','r+b)
"""
SECTOR_OFFSET_SIZE = 3 # Chunk offset is a 3-byte value
SECTOR_COUNT_SIZE = 1 # Chunk size is a 1-byte value
TIMESTAMP_SIZE = 4 # Timestamp is a 4-byte value
DATA_SIZE_SIZE = 4 # First 4 bytes of the chunk data are its size
COMPRESSION_TYPE_SIZE = 1 # Compression type is a single byte
DIMENSION_SIZE_POWER = 5 # Used for bit shifting (2**5 = 32), 32 x or z values
DIMENSION_COUNT = 2 # There is only the X and Z dimension for the chunks.
SECTOR_SIZE_POWER = 12 # Used for bit shifting (2**12 = 4096), 4096 per sector of the file
SECTOR_DETAILS_SIZE = SECTOR_OFFSET_SIZE + SECTOR_COUNT_SIZE # Full size of the chunk details (4 bytes)
DATA_HEADER_SIZE = DATA_SIZE_SIZE + COMPRESSION_TYPE_SIZE # Full size of the chunk header (5 bytes)
DIMENSION_SIZE = 1 << DIMENSION_SIZE_POWER # Used for bitwise operations on the provided chunk x and z values
DIMENSION_SIZE_MASK = DIMENSION_SIZE - 1 # DIM_SIZE = 0b100000, MASK = -0b11111 and used for bitwise operations
INDEX_COUNT = DIMENSION_SIZE * DIMENSION_COUNT # How many indexes (32 * 2 = 64)
HEADER_SIZE = SECTOR_DETAILS_SIZE + INDEX_COUNT # 64 indexes + 4 byte details = 68
SECTOR_SIZE = 1 << SECTOR_SIZE_POWER # 4096 bytes
# Compression types
COMPRESSION_GZIP = 1
COMPRESSION_ZLIB = 2
def __init__(self, filepath):
"""Given a filename, returns an object to reference region file data.
We open the file as a binary file. Once you instantiate an object using this class,
you are likely to call get_data(chunkX, chunkZ) or get_timestamp(chunkX, chunkZ).
We frequently pass chunkX, chunkZ as *args in this Class.
filepath: full path to the region file (e.g. /opt/mc/region/r.1.1.mca)"""
self.data = open(filepath, 'r+b')
def get_index(self, *args):
"""Get the index for the chunk
This computes the index to locate both the chunk timestamp and size in the
size and timestamp tables at the beginning of the region file.
See https://minecraft.gamepedia.com/Region_file_format#Structure"""
index = 0
for dimension in range(self.DIMENSION_COUNT):
index |= (args[dimension] & self.DIMENSION_SIZE_MASK) << dimension * self.DIMENSION_SIZE_POWER
return index
def get_sector_offset_offset(self, *args):
"""Get the offset for the offset of the sector.
Returns the offset for the three-byte offset in 4KiB sectors from the start of the file
where the chunk data is stored.
See https://minecraft.gamepedia.com/Region_file_format#Chunk_location"""
return self.get_index(*args) * self.SECTOR_DETAILS_SIZE
def get_sector_count_offset(self, *args):
"""Return the offset for the size of the chunk.
Returns the offset for the byte which gives the length of the chunk (in 4KiB sectors, rounded up).
See https://minecraft.gamepedia.com/Region_file_format#Chunk_location"""
return self.get_sector_offset_offset(*args) + self.SECTOR_OFFSET_SIZE
def get_timestamp_offset(self, *args):
"""Return the offset for the last modification of the chunk.
Returns the offset for the 4 bytes which gives the timestamp of last modification for the chunk.
See https://minecraft.gamepedia.com/Region_file_format#Chunk_timestamps"""
return self.get_index(*args) * self.TIMESTAMP_SIZE + self.SECTOR_SIZE
def get_sector_offset(self, *args):
"""Return the sector offset value.
Uses the earlier-defined function to seek appropriately in the file, and then it will return an int representing
how many 4096 byte offsets from the start of the file the chunk is at."""
offset = self.get_sector_offset_offset(*args)
self.data.seek(offset, 0)
return int.from_bytes(self.data.read(self.SECTOR_OFFSET_SIZE), 'big')
def get_data_offset(self, *args):
"""Return the byte offset for the chunk.
Basically multiplies the sector offset value by 4096. But we do it with bitshifting. This value is the location
of where the chunk data begins."""
return self.get_sector_offset(*args) << self.SECTOR_SIZE_POWER
def get_sector_count(self, *args):
"""Return the sector size value.
Uses the earlier-defined function to seek appropriately in the file, and then it will return an int representing
how many 4096 bytes the chunk data occupies."""
offset = self.get_sector_count_offset(*args)
self.data.seek(offset)
return int.from_bytes(self.data.read(self.SECTOR_COUNT_SIZE), 'big')
def get_timestamp(self, *args):
"""Return the last modified timestamp.
Seeks using the timestamp_offset and returns the timestamp as an int"""
offset = self.get_timestamp_offset(*args)
self.data.seek(offset, 0)
return int.from_bytes(self.data.read(self.TIMESTAMP_SIZE), 'big')
def get_data_size(self, *args):
"""Return the byte size for the chunk.
The first 4 bytes of the chunk data is the byte-length of the chunk data. We return that as an int.
See https://minecraft.gamepedia.com/Region_file_format#Chunk_data"""
offset = self.get_data_offset(*args)
self.data.seek(offset, 0)
return int.from_bytes(self.data.read(self.DATA_SIZE_SIZE), 'big')
def get_compression_type(self, *args):
"""Return the compression type for the chunk.
This value is either 1 or 2 for GZip or Zlib respectively"""
offset = self.get_data_offset(*args) + self.DATA_SIZE_SIZE
self.data.seek(offset, 0)
return int.from_bytes(self.data.read(self.COMPRESSION_TYPE_SIZE), 'big')
def get_data(self, *args):
"""Returns NBT data for the chunk specified by x and z in *args.
We get the start location of the chunk data. If that is valid, we skip the 4-byte header and read the size
learned from get_data_size. Based on the compression type, we either gzip or zlib decompress the data."""
datastart = self.get_data_offset(*args)
if datastart != 0:
payloadstart = datastart + self.DATA_HEADER_SIZE
payloadsize = self.get_data_size(*args)
self.data.seek(payloadstart, 0)
payload = self.data.read(payloadsize)
compressiontype = self.get_compression_type(*args)
if compressiontype == self.COMPRESSION_GZIP:
return gzip.decompress(payload)
elif compressiontype == self.COMPRESSION_ZLIB:
return zlib.decompress(payload)
@nwesterhausen
Copy link
Author

Edit: I believe I have found the issue. In get_timestamp_offset, self.HEADER_SIZE is being used but it should be replaced by self.SECTOR_SIZE.

Made the change in the gist

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment