-
-
Save nwesterhausen/527fb947d4432c1f40c06dca07cb9253 to your computer and use it in GitHub Desktop.
"""Code to read .mca region files | |
I modified the javascript library mca-js to create this file. | |
mca-js: https://github.com/thejonwithnoh/mca-js | |
This is largely just a python interpretation of that script. | |
----------- | |
MIT License | |
Copyright (c) 2019 Nicholas Westerhausen | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the Software is | |
furnished to do so, subject to the following conditions: | |
The above copyright notice and this permission notice shall be included in all | |
copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
SOFTWARE. | |
""" | |
import gzip | |
import zlib | |
class Mca: | |
"""Class used to read Minecraft region files and the chunk information contained within. | |
Use by creating a new object with the filepath for the region file. Then you can get_timestamp and get_data | |
for individual chunks in the region by specifying the chunkX or chunkY if they were to always go from 0 to 32 | |
for each region. | |
region = Mca('/opt/mc/region/r.1.1.mca') | |
nbt = region.get_data(0,0) # gets the raw nbt data for chunk 0,0 | |
# here you can do stuff with that nbt data. It is the same format as if you open('level.dat','r+b) | |
""" | |
SECTOR_OFFSET_SIZE = 3 # Chunk offset is a 3-byte value | |
SECTOR_COUNT_SIZE = 1 # Chunk size is a 1-byte value | |
TIMESTAMP_SIZE = 4 # Timestamp is a 4-byte value | |
DATA_SIZE_SIZE = 4 # First 4 bytes of the chunk data are its size | |
COMPRESSION_TYPE_SIZE = 1 # Compression type is a single byte | |
DIMENSION_SIZE_POWER = 5 # Used for bit shifting (2**5 = 32), 32 x or z values | |
DIMENSION_COUNT = 2 # There is only the X and Z dimension for the chunks. | |
SECTOR_SIZE_POWER = 12 # Used for bit shifting (2**12 = 4096), 4096 per sector of the file | |
SECTOR_DETAILS_SIZE = SECTOR_OFFSET_SIZE + SECTOR_COUNT_SIZE # Full size of the chunk details (4 bytes) | |
DATA_HEADER_SIZE = DATA_SIZE_SIZE + COMPRESSION_TYPE_SIZE # Full size of the chunk header (5 bytes) | |
DIMENSION_SIZE = 1 << DIMENSION_SIZE_POWER # Used for bitwise operations on the provided chunk x and z values | |
DIMENSION_SIZE_MASK = DIMENSION_SIZE - 1 # DIM_SIZE = 0b100000, MASK = -0b11111 and used for bitwise operations | |
INDEX_COUNT = DIMENSION_SIZE * DIMENSION_COUNT # How many indexes (32 * 2 = 64) | |
HEADER_SIZE = SECTOR_DETAILS_SIZE + INDEX_COUNT # 64 indexes + 4 byte details = 68 | |
SECTOR_SIZE = 1 << SECTOR_SIZE_POWER # 4096 bytes | |
# Compression types | |
COMPRESSION_GZIP = 1 | |
COMPRESSION_ZLIB = 2 | |
def __init__(self, filepath): | |
"""Given a filename, returns an object to reference region file data. | |
We open the file as a binary file. Once you instantiate an object using this class, | |
you are likely to call get_data(chunkX, chunkZ) or get_timestamp(chunkX, chunkZ). | |
We frequently pass chunkX, chunkZ as *args in this Class. | |
filepath: full path to the region file (e.g. /opt/mc/region/r.1.1.mca)""" | |
self.data = open(filepath, 'r+b') | |
def get_index(self, *args): | |
"""Get the index for the chunk | |
This computes the index to locate both the chunk timestamp and size in the | |
size and timestamp tables at the beginning of the region file. | |
See https://minecraft.gamepedia.com/Region_file_format#Structure""" | |
index = 0 | |
for dimension in range(self.DIMENSION_COUNT): | |
index |= (args[dimension] & self.DIMENSION_SIZE_MASK) << dimension * self.DIMENSION_SIZE_POWER | |
return index | |
def get_sector_offset_offset(self, *args): | |
"""Get the offset for the offset of the sector. | |
Returns the offset for the three-byte offset in 4KiB sectors from the start of the file | |
where the chunk data is stored. | |
See https://minecraft.gamepedia.com/Region_file_format#Chunk_location""" | |
return self.get_index(*args) * self.SECTOR_DETAILS_SIZE | |
def get_sector_count_offset(self, *args): | |
"""Return the offset for the size of the chunk. | |
Returns the offset for the byte which gives the length of the chunk (in 4KiB sectors, rounded up). | |
See https://minecraft.gamepedia.com/Region_file_format#Chunk_location""" | |
return self.get_sector_offset_offset(*args) + self.SECTOR_OFFSET_SIZE | |
def get_timestamp_offset(self, *args): | |
"""Return the offset for the last modification of the chunk. | |
Returns the offset for the 4 bytes which gives the timestamp of last modification for the chunk. | |
See https://minecraft.gamepedia.com/Region_file_format#Chunk_timestamps""" | |
return self.get_index(*args) * self.TIMESTAMP_SIZE + self.SECTOR_SIZE | |
def get_sector_offset(self, *args): | |
"""Return the sector offset value. | |
Uses the earlier-defined function to seek appropriately in the file, and then it will return an int representing | |
how many 4096 byte offsets from the start of the file the chunk is at.""" | |
offset = self.get_sector_offset_offset(*args) | |
self.data.seek(offset, 0) | |
return int.from_bytes(self.data.read(self.SECTOR_OFFSET_SIZE), 'big') | |
def get_data_offset(self, *args): | |
"""Return the byte offset for the chunk. | |
Basically multiplies the sector offset value by 4096. But we do it with bitshifting. This value is the location | |
of where the chunk data begins.""" | |
return self.get_sector_offset(*args) << self.SECTOR_SIZE_POWER | |
def get_sector_count(self, *args): | |
"""Return the sector size value. | |
Uses the earlier-defined function to seek appropriately in the file, and then it will return an int representing | |
how many 4096 bytes the chunk data occupies.""" | |
offset = self.get_sector_count_offset(*args) | |
self.data.seek(offset) | |
return int.from_bytes(self.data.read(self.SECTOR_COUNT_SIZE), 'big') | |
def get_timestamp(self, *args): | |
"""Return the last modified timestamp. | |
Seeks using the timestamp_offset and returns the timestamp as an int""" | |
offset = self.get_timestamp_offset(*args) | |
self.data.seek(offset, 0) | |
return int.from_bytes(self.data.read(self.TIMESTAMP_SIZE), 'big') | |
def get_data_size(self, *args): | |
"""Return the byte size for the chunk. | |
The first 4 bytes of the chunk data is the byte-length of the chunk data. We return that as an int. | |
See https://minecraft.gamepedia.com/Region_file_format#Chunk_data""" | |
offset = self.get_data_offset(*args) | |
self.data.seek(offset, 0) | |
return int.from_bytes(self.data.read(self.DATA_SIZE_SIZE), 'big') | |
def get_compression_type(self, *args): | |
"""Return the compression type for the chunk. | |
This value is either 1 or 2 for GZip or Zlib respectively""" | |
offset = self.get_data_offset(*args) + self.DATA_SIZE_SIZE | |
self.data.seek(offset, 0) | |
return int.from_bytes(self.data.read(self.COMPRESSION_TYPE_SIZE), 'big') | |
def get_data(self, *args): | |
"""Returns NBT data for the chunk specified by x and z in *args. | |
We get the start location of the chunk data. If that is valid, we skip the 4-byte header and read the size | |
learned from get_data_size. Based on the compression type, we either gzip or zlib decompress the data.""" | |
datastart = self.get_data_offset(*args) | |
if datastart != 0: | |
payloadstart = datastart + self.DATA_HEADER_SIZE | |
payloadsize = self.get_data_size(*args) | |
self.data.seek(payloadstart, 0) | |
payload = self.data.read(payloadsize) | |
compressiontype = self.get_compression_type(*args) | |
if compressiontype == self.COMPRESSION_GZIP: | |
return gzip.decompress(payload) | |
elif compressiontype == self.COMPRESSION_ZLIB: | |
return zlib.decompress(payload) |
I ended up going a few steps beyond this script as well. I don't have the free time right now to dive back into it, but maybe it can help you.
Hey, I think this adds an extra byte to the payloadsize? Recompressing the uncompressed data leaves it 1 byte short of the original compressed string but otherwise exactly equal. If you subtract 1 from the payloadsize variable, this doesn't happen.
Edit: I believe I have found the issue. In get_timestamp_offset, self.HEADER_SIZE is being used but it should be replaced by self.SECTOR_SIZE.
Original message:
Hello! I have been using this library and it worked great for a while but I have got a new issue. For every new region I generate, I get exactly 17 chunks with the correct timestamps and the 1007 others give me a number which is much lower than it should. I have tried this with a new server and with a vanilla setup, this still happens. I have opened those region files in another program and it can display the chunk timestamps properly so it seems to be an issue with this library. I have tried understanding what could cause this but I just don't see how this can happen.
The issue is easy to reproduce, make a new world with Minecraft Java edition 1.20.1 and make sure one region file is fully generated.
By using a simple script with a loop or two you can print out all the timestamps. You should have 17 correct ones and 1007 that are extremely low, they don't reach a week past 1970-01-01.
Thank you again for this, hopefully we can figure out what's happening here!
Edit: I believe I have found the issue. In get_timestamp_offset, self.HEADER_SIZE is being used but it should be replaced by self.SECTOR_SIZE.
Made the change in the gist
Hey! I am working on a project that is a few steps beyond your library. Would you like to collaborate on it?