kfsone/tdbinary.py Secret

## tdbinary.py
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from struct import pack, unpack
import os
import time

MAGIC = "SILS"  # Station Item Listing
FORMAT = 1
EPOCH = 1500000000

ITEM_FMT = "".join(
    "!"         # Use network endianess
    "I"         # item_id: u32
    "I"         # fdev_id: u32
    "I"         # avg_price: u32
    "H"         # category_id: u16
    "H"         # ui_order: u16
    "40s"       # name
)
ITEM_LEN = 4 + 4 + 4 + 2 + 2 + 40  # 56
print("-- Item Struct Len:", ITEM_LEN)
# At time of writing, there are 400 items in the game,
# so assuming a maximum of 512 for the time being will
# allow us to use 8-bytes to represent all of them in
# an availability-bit-mask.
ITEM_LIMIT = 512
ITEM_BYTES = ITEM_LIMIT / 8
ITEM_U64S = ITEM_BYTES / 8  # 8 = 8 bytes per u64


@dataclass(slots=True, frozen=True)
class Item:
    item_id:        int     # u32
    name:           bytes   # [40; u8]
    category_id:    int     # u16
    ui_order:       int     # u16
    avg_price:      int     # u32
    fdev_id:        int     # u32

    def pack(self):
        return pack(ITEM_FMT, self.item_id, self.fdev_id, self.avg_price, self.category_id, self.ui_order, self.name)

    @staticmethod
    def unpack(source: bytes):
        item_id, fdev_id, avg_price, category_id, ui_order, name = unpack(
            ITEM_FMT, source)
        return Item(item_id, name, category_id, ui_order, avg_price, fdev_id)


class StationFlags(Enum):
    MARKET = 0
    BLACK_MARKET = 1
    SHIPYARD = 2
    OUTFITTING = 3
    REARM = 4
    REFUEL = 5
    REPAIR = 6
    PLANET = 7


STATION_FMT = "".join(
    "!"         # Use network endianess
    "Q"         # station_id: u64
    "I"         # system_id: u32
    "I"         # modified: u32 (minus epoch)
    "f"         # jump_ls: float
    "h"         # type_id: i16
    "B"         # pads: u8
    "B"         # known_flags: u8
    "B"         # flags: u8
    "40s"       # name
    "x"         # pad byte
)
STATION_LEN = 8 + 4 + 4 + 4 + 2 + 1 + 1 + 1 + 40 + 1
print("-- Station Struct Len:", STATION_LEN)


@dataclass(slots=True, frozen=True)
class Station:
    station_id:  int    # u32
    name:        bytes  # [40; u8]
    system_id:   int    # u64
    jump_ls:     float  # f32
    known_flags: int    # u8
    flags:       int    # u8
    pads:        int    # u8
    type_id:     int    # u16
    modified:    int    # u32  (minus epoch)
    supply:      bytes  # [ITEM_BYTES; u8]
    demand:      bytes  # [ITEM_BYTES; u8]

    def pack(self):
        return pack(STATION_FMT, self.system_id, self.station_id, self.modified - EPOCH, self.jump_ls, self.type_id, self.pads, self.known_flags, self.flags, self.name)

    def populate_availability(self, item_lids, supply, demand):
        self.supply = bytearray(ITEM_BYTES)  # one byte per 8 max items
        for item_lid, listing in supply:
            # the entries aren't sequential, calculate the byte and bit
            # this will live in - I actually think it should probably be
            # u64s rather than bytes but bytes are easier in python
            byte_no, bit_no = item_lid >> 3, item_lid & 0x07
            supply_mask[byte_no] |= (1 << bit_no)

    @staticmethod
    def unpack(source: bytes):
        station_id, system_id, modified, jump_ls, type_id, pads, known_flags, flags, name = unpack(
            STATION_FMT, source)
        modified += EPOCH if modified else 0
        return Station(station_id, name, system_id, jump_ls, known_flags, flags, pads, type_id, modified, bytearray(ITEM_BYTES), bytearray(ITEM_BYTES))


LISTING_FMT = "".join(
    "!"
    "I"     # u32: price
    "I"     # u32: units
    "I"     # u32: modified (minus epoch)
    "I"     # u32: (actually a u8 with padding)
)
LISTING_LEN = 4 * 4


@dataclass(slots=True, frozen=True)
class Listing:
    price:          int     # u32
    units:          int     # u32
    level:          int     # u8 padd to u32 for convenience
    modified:       int     # u32  (minus epoch)

    def pack(self):
        modified = self.modified - EPOCH if self.modified else 0
        return pack(LISTING_FMT, self.price, self.units, self.level, modified)

    @staticmethod
    def unpack(source: bytes):
        price, units, level, modified = unpack(ITEM_FMT, source)
        modified = modified + EPOCH if modified else 0
        return Listing(price, units, level, modified)


item_csv = "Item.csv"
if not os.path.exists(item_csv):
    item_csv = "data/" + item_csv
listings_csv = "listings.csv"
if not os.path.exists(listings_csv):
    listings_csv = "eddb/" + listings_csv
    if not os.path.exists(listings_csv):
        listings_csv = "data/" + listings_csv
station_csv = "Station.csv"
if not os.path.exists(station_csv):
    station_csv = "data/" + station_csv


def read_item_table():
    print("++ Reading Item.csv")

    items = {}  # real id -> (name, lid)
    item_ids = {}
    item_lids = []  # local id -> real id

    with open(item_csv, "rb") as item_fh:
        item_lines = iter(item_fh)
        next(item_lines)  # skip header
        item_count_off = item_fh.tell()
        for line in item_lines:
            (item_id_text, name, category, ui_order,
             avg_price, fdev_id) = line.split(b',')
            item_id = int(item_id_text)
            item_lid = len(item_lids)
            item = Item(item_id, name[1:-1], int(category),
                        int(ui_order), int(avg_price), int(fdev_id))
            items[item_id] = item
            item_ids[item_id] = item_lid
            item_lids += [item_id]

    print(f"-- {len(item_lids):,} items, {min(items):,}-{max(items):,}")

    return items, item_ids, item_lids


def read_station_table():
    print("++ Reading Station.csv")

    stations = {}  # real id -> name
    station_ids = {}
    station_lids = []  # local id -> real id

    max_pad_sizes = b"SML"

    with open(station_csv, "rb") as station_fh:
        station_lines = iter(station_fh)
        next(station_lines)
        for line in station_lines:
            (station_id_text, name, system, jump_ls, blackmarket, max_pad, market, shipyard,
             modified, outfitting, rearm, refuel, repair, planetary, type_id) = line.split(b',')
            station_id = int(station_id_text)
            known_flags, flags = 0, 0
            for bit, value in enumerate((market, blackmarket, shipyard, outfitting, rearm, refuel, repair, planetary)):
                if value == b'Y':
                    known_flags |= (1 << bit)
                    flags |= (1 << bit)
                elif value == b'N':
                    known_flags |= (1 << bit)

            max_pad_size = max_pad_sizes.find(max_pad) + 1  # 0-based

            if modified.startswith(b"'"):  # string format, need to parse it.
                modified = datetime.fromisoformat(
                    modified[1:-1].decode('ascii')).timestamp()
            modified = int(modified)

            station = Station(station_id, name[1:-1], int(system), float(
                jump_ls), known_flags, flags, max_pad_size, int(type_id), modified, b"", b"")
            station_lid = len(station_lids)
            stations[station_id] = station
            station_ids[station_id] = station_lid
            station_lids += [station_id]

    print(f"-- {len(station_lids):,} stations, {min(stations):,}-{max(stations):,}")

    return stations, station_ids, station_lids


def read_station_listings(station_ids, item_ids):
    print("++ Reading listings.csv")

    supplies = {}
    demands = {}
    mismatch = 0

    start = time.time()
    with open(listings_csv, "rb") as list_fh:
        listings = iter(list_fh)
        next(listings)
        cur_station = None
        station_id = None
        station_lid = None
        cur_sup = None
        cur_dem = None
        cur_mod = None
        stn_count = 0

        for rec_no, line in enumerate(listings, 1):
            if rec_no % 50000 == 1:
                print(f"~~ {rec_no:,}\r", end='')
            # id, station, item, s_units, s_level, s_price, d_price, d_units, d_level, modified
            (_, station_id_text, item_id_text, s_units, s_level, s_price,
             d_price, d_units, d_level, modified) = line.split(b',')
            if station_id_text != cur_station:
                if cur_sup:
                    supplies[station_id] = cur_sup
                cur_sup = {}
                if cur_dem:
                    demands[station_id] = cur_dem
                cur_dem = {}
                cur_station = station_id_text
                station_id = int(cur_station)
                station_lid = station_ids[station_id]
                cur_mod = modified
                stn_count += 1
            elif cur_mod != modified:
                mismatch += 1

            item_id = int(item_id_text)
            item_lid = item_ids[item_id]

            if s_price != b'0':
                cur_sup[item_lid] = Listing(int(s_price), int(
                    s_units), int(s_level) + 1, int(modified))
            if d_price != b'0':
                cur_dem[item_lid] = Listing(int(d_price), int(
                    d_units), int(d_level) + 1, int(modified))

        if cur_station:
            if cur_sup:
                supplies[station_id] = cur_sup
            if cur_dem:
                demands[station_id] = cur_dem

    print(f"-- {len(supplies):,} supply entries, {len(demands):,} demand entries; {rec_no:,} listings over {stn_count:,} stations in {time.time()-start:.2f}s")

    if mismatch:
        print(f".. {mismatch} mismatched dates")

    return supplies, demands


items, item_ids, item_lids = read_item_table()
stations, station_ids, station_lids = read_station_table()
supply, demand = read_station_listings(station_ids, item_ids)


def write_listings(ident, listings, station_lids, items, out_fh):
    start = time.time()
    out_fh.write(bytes(ident))
    get = listings.get
    item_no = 0
    start_pos = out_fh.tell()
    station_bytes = len(items) * LISTING_LEN
    total_bytes = len(station_ids) * station_bytes
    empty_station = bytearray(station_bytes)
    empty_stations = 0
    pop_stations = 0

    for stn_no, stn in enumerate(station_lids, 1):
        if stn_no % 5000 == 1:
            print(f"~~ stations: {stn_no:,}, listings:{item_no:,}\r", end='')
        stn_listings = get(stn) or {}
        if not stn_listings:
            out_fh.write(empty_station)
            empty_stations += 1
            continue

        data = bytearray(len(items) * LISTING_LEN)
        # TODO: fill with zeros then populate the appropriate block
        for item_lid, listing in stn_listings.items():
            offset = item_lid * LISTING_LEN
            data[offset:offset + LISTING_LEN] = listing.pack()
            item_no += 1
        pop_stations += 1
        out_fh.write(data)
    print(f"-- Saved {item_no:,} items across {stn_no:,} stations ({pop_stations:,} populated, {empty_stations:,} empty) in {time.time()-start:.2f}s")


with open("tradedangerous.data", "wb") as out_fh:
    # File type and version ident
    out_fh.write(f"{MAGIC}{FORMAT:04X}\n".encode())

    # Remember where we're going to store the item and station counts and sizes
    print("<< Headers")
    out_fh.write(b"HDRS")
    out_fh.write(pack("!QI", len(items), ITEM_LEN))
    out_fh.write(pack("!QI", len(stations), STATION_LEN))

    print("<< Item List")
    out_fh.write(b"ITLS")
    for item in items.values():
        out_fh.write(item.pack())

    print("<< Station List")
    out_fh.write(b"STLS")
    for stn in stations.values():
        out_fh.write(stn.pack())

    print("<< Supply")
    write_listings(b"STSU", supply, station_lids, items, out_fh)

    print("<< Demand")
    write_listings(b"STDE", demand, station_lids, items, out_fh)

    size = out_fh.tell()
    sized = f"{size / 1024 / 1024 / 1024:,.2f}GB" if size > 750 * \
        1024 * 1024 else f"{size / 1024 / 1024:,.2f}MB"
    print(f"-- Data file is {size:,} bytes ({sized})")
	from dataclasses import dataclass
	from datetime import datetime
	from enum import Enum
	from struct import pack, unpack
	import os
	import time

	MAGIC = "SILS" # Station Item Listing
	FORMAT = 1
	EPOCH = 1500000000

	ITEM_FMT = "".join(
	"!" # Use network endianess
	"I" # item_id: u32
	"I" # fdev_id: u32
	"I" # avg_price: u32
	"H" # category_id: u16
	"H" # ui_order: u16
	"40s" # name
	)
	ITEM_LEN = 4 + 4 + 4 + 2 + 2 + 40 # 56
	print("-- Item Struct Len:", ITEM_LEN)
	# At time of writing, there are 400 items in the game,
	# so assuming a maximum of 512 for the time being will
	# allow us to use 8-bytes to represent all of them in
	# an availability-bit-mask.
	ITEM_LIMIT = 512
	ITEM_BYTES = ITEM_LIMIT / 8
	ITEM_U64S = ITEM_BYTES / 8 # 8 = 8 bytes per u64


	@dataclass(slots=True, frozen=True)
	class Item:
	item_id: int # u32
	name: bytes # [40; u8]
	category_id: int # u16
	ui_order: int # u16
	avg_price: int # u32
	fdev_id: int # u32

	def pack(self):
	return pack(ITEM_FMT, self.item_id, self.fdev_id, self.avg_price, self.category_id, self.ui_order, self.name)

	@staticmethod
	def unpack(source: bytes):
	item_id, fdev_id, avg_price, category_id, ui_order, name = unpack(
	ITEM_FMT, source)
	return Item(item_id, name, category_id, ui_order, avg_price, fdev_id)


	class StationFlags(Enum):
	MARKET = 0
	BLACK_MARKET = 1
	SHIPYARD = 2
	OUTFITTING = 3
	REARM = 4
	REFUEL = 5
	REPAIR = 6
	PLANET = 7


	STATION_FMT = "".join(
	"!" # Use network endianess
	"Q" # station_id: u64
	"I" # system_id: u32
	"I" # modified: u32 (minus epoch)
	"f" # jump_ls: float
	"h" # type_id: i16
	"B" # pads: u8
	"B" # known_flags: u8
	"B" # flags: u8
	"40s" # name
	"x" # pad byte
	)
	STATION_LEN = 8 + 4 + 4 + 4 + 2 + 1 + 1 + 1 + 40 + 1
	print("-- Station Struct Len:", STATION_LEN)


	@dataclass(slots=True, frozen=True)
	class Station:
	station_id: int # u32
	name: bytes # [40; u8]
	system_id: int # u64
	jump_ls: float # f32
	known_flags: int # u8
	flags: int # u8
	pads: int # u8
	type_id: int # u16
	modified: int # u32 (minus epoch)
	supply: bytes # [ITEM_BYTES; u8]
	demand: bytes # [ITEM_BYTES; u8]

	def pack(self):
	return pack(STATION_FMT, self.system_id, self.station_id, self.modified - EPOCH, self.jump_ls, self.type_id, self.pads, self.known_flags, self.flags, self.name)

	def populate_availability(self, item_lids, supply, demand):
	self.supply = bytearray(ITEM_BYTES) # one byte per 8 max items
	for item_lid, listing in supply:
	# the entries aren't sequential, calculate the byte and bit
	# this will live in - I actually think it should probably be
	# u64s rather than bytes but bytes are easier in python
	byte_no, bit_no = item_lid >> 3, item_lid & 0x07
	supply_mask[byte_no] \|= (1 << bit_no)

	@staticmethod
	def unpack(source: bytes):
	station_id, system_id, modified, jump_ls, type_id, pads, known_flags, flags, name = unpack(
	STATION_FMT, source)
	modified += EPOCH if modified else 0
	return Station(station_id, name, system_id, jump_ls, known_flags, flags, pads, type_id, modified, bytearray(ITEM_BYTES), bytearray(ITEM_BYTES))


	LISTING_FMT = "".join(
	"!"
	"I" # u32: price
	"I" # u32: units
	"I" # u32: modified (minus epoch)
	"I" # u32: (actually a u8 with padding)
	)
	LISTING_LEN = 4 * 4


	@dataclass(slots=True, frozen=True)
	class Listing:
	price: int # u32
	units: int # u32
	level: int # u8 padd to u32 for convenience
	modified: int # u32 (minus epoch)

	def pack(self):
	modified = self.modified - EPOCH if self.modified else 0
	return pack(LISTING_FMT, self.price, self.units, self.level, modified)

	@staticmethod
	def unpack(source: bytes):
	price, units, level, modified = unpack(ITEM_FMT, source)
	modified = modified + EPOCH if modified else 0
	return Listing(price, units, level, modified)


	item_csv = "Item.csv"
	if not os.path.exists(item_csv):
	item_csv = "data/" + item_csv
	listings_csv = "listings.csv"
	if not os.path.exists(listings_csv):
	listings_csv = "eddb/" + listings_csv
	if not os.path.exists(listings_csv):
	listings_csv = "data/" + listings_csv
	station_csv = "Station.csv"
	if not os.path.exists(station_csv):
	station_csv = "data/" + station_csv


	def read_item_table():
	print("++ Reading Item.csv")

	items = {} # real id -> (name, lid)
	item_ids = {}
	item_lids = [] # local id -> real id

	with open(item_csv, "rb") as item_fh:
	item_lines = iter(item_fh)
	next(item_lines) # skip header
	item_count_off = item_fh.tell()
	for line in item_lines:
	(item_id_text, name, category, ui_order,
	avg_price, fdev_id) = line.split(b',')
	item_id = int(item_id_text)
	item_lid = len(item_lids)
	item = Item(item_id, name[1:-1], int(category),
	int(ui_order), int(avg_price), int(fdev_id))
	items[item_id] = item
	item_ids[item_id] = item_lid
	item_lids += [item_id]

	print(f"-- {len(item_lids):,} items, {min(items):,}-{max(items):,}")

	return items, item_ids, item_lids


	def read_station_table():
	print("++ Reading Station.csv")

	stations = {} # real id -> name
	station_ids = {}
	station_lids = [] # local id -> real id

	max_pad_sizes = b"SML"

	with open(station_csv, "rb") as station_fh:
	station_lines = iter(station_fh)
	next(station_lines)
	for line in station_lines:
	(station_id_text, name, system, jump_ls, blackmarket, max_pad, market, shipyard,
	modified, outfitting, rearm, refuel, repair, planetary, type_id) = line.split(b',')
	station_id = int(station_id_text)
	known_flags, flags = 0, 0
	for bit, value in enumerate((market, blackmarket, shipyard, outfitting, rearm, refuel, repair, planetary)):
	if value == b'Y':
	known_flags \|= (1 << bit)
	flags \|= (1 << bit)
	elif value == b'N':
	known_flags \|= (1 << bit)

	max_pad_size = max_pad_sizes.find(max_pad) + 1 # 0-based

	if modified.startswith(b"'"): # string format, need to parse it.
	modified = datetime.fromisoformat(
	modified[1:-1].decode('ascii')).timestamp()
	modified = int(modified)

	station = Station(station_id, name[1:-1], int(system), float(
	jump_ls), known_flags, flags, max_pad_size, int(type_id), modified, b"", b"")
	station_lid = len(station_lids)
	stations[station_id] = station
	station_ids[station_id] = station_lid
	station_lids += [station_id]

	print(f"-- {len(station_lids):,} stations, {min(stations):,}-{max(stations):,}")

	return stations, station_ids, station_lids


	def read_station_listings(station_ids, item_ids):
	print("++ Reading listings.csv")

	supplies = {}
	demands = {}
	mismatch = 0

	start = time.time()
	with open(listings_csv, "rb") as list_fh:
	listings = iter(list_fh)
	next(listings)
	cur_station = None
	station_id = None
	station_lid = None
	cur_sup = None
	cur_dem = None
	cur_mod = None
	stn_count = 0

	for rec_no, line in enumerate(listings, 1):
	if rec_no % 50000 == 1:
	print(f"~~ {rec_no:,}\r", end='')
	# id, station, item, s_units, s_level, s_price, d_price, d_units, d_level, modified
	(_, station_id_text, item_id_text, s_units, s_level, s_price,
	d_price, d_units, d_level, modified) = line.split(b',')
	if station_id_text != cur_station:
	if cur_sup:
	supplies[station_id] = cur_sup
	cur_sup = {}
	if cur_dem:
	demands[station_id] = cur_dem
	cur_dem = {}
	cur_station = station_id_text
	station_id = int(cur_station)
	station_lid = station_ids[station_id]
	cur_mod = modified
	stn_count += 1
	elif cur_mod != modified:
	mismatch += 1

	item_id = int(item_id_text)
	item_lid = item_ids[item_id]

	if s_price != b'0':
	cur_sup[item_lid] = Listing(int(s_price), int(
	s_units), int(s_level) + 1, int(modified))
	if d_price != b'0':
	cur_dem[item_lid] = Listing(int(d_price), int(
	d_units), int(d_level) + 1, int(modified))

	if cur_station:
	if cur_sup:
	supplies[station_id] = cur_sup
	if cur_dem:
	demands[station_id] = cur_dem

	print(f"-- {len(supplies):,} supply entries, {len(demands):,} demand entries; {rec_no:,} listings over {stn_count:,} stations in {time.time()-start:.2f}s")

	if mismatch:
	print(f".. {mismatch} mismatched dates")

	return supplies, demands


	items, item_ids, item_lids = read_item_table()
	stations, station_ids, station_lids = read_station_table()
	supply, demand = read_station_listings(station_ids, item_ids)


	def write_listings(ident, listings, station_lids, items, out_fh):
	start = time.time()
	out_fh.write(bytes(ident))
	get = listings.get
	item_no = 0
	start_pos = out_fh.tell()
	station_bytes = len(items) * LISTING_LEN
	total_bytes = len(station_ids) * station_bytes
	empty_station = bytearray(station_bytes)
	empty_stations = 0
	pop_stations = 0

	for stn_no, stn in enumerate(station_lids, 1):
	if stn_no % 5000 == 1:
	print(f"~~ stations: {stn_no:,}, listings:{item_no:,}\r", end='')
	stn_listings = get(stn) or {}
	if not stn_listings:
	out_fh.write(empty_station)
	empty_stations += 1
	continue

	data = bytearray(len(items) * LISTING_LEN)
	# TODO: fill with zeros then populate the appropriate block
	for item_lid, listing in stn_listings.items():
	offset = item_lid * LISTING_LEN
	data[offset:offset + LISTING_LEN] = listing.pack()
	item_no += 1
	pop_stations += 1
	out_fh.write(data)
	print(f"-- Saved {item_no:,} items across {stn_no:,} stations ({pop_stations:,} populated, {empty_stations:,} empty) in {time.time()-start:.2f}s")


	with open("tradedangerous.data", "wb") as out_fh:
	# File type and version ident
	out_fh.write(f"{MAGIC}{FORMAT:04X}\n".encode())

	# Remember where we're going to store the item and station counts and sizes
	print("<< Headers")
	out_fh.write(b"HDRS")
	out_fh.write(pack("!QI", len(items), ITEM_LEN))
	out_fh.write(pack("!QI", len(stations), STATION_LEN))

	print("<< Item List")
	out_fh.write(b"ITLS")
	for item in items.values():
	out_fh.write(item.pack())

	print("<< Station List")
	out_fh.write(b"STLS")
	for stn in stations.values():
	out_fh.write(stn.pack())

	print("<< Supply")
	write_listings(b"STSU", supply, station_lids, items, out_fh)

	print("<< Demand")
	write_listings(b"STDE", demand, station_lids, items, out_fh)

	size = out_fh.tell()
	sized = f"{size / 1024 / 1024 / 1024:,.2f}GB" if size > 750 * \
	1024 * 1024 else f"{size / 1024 / 1024:,.2f}MB"
	print(f"-- Data file is {size:,} bytes ({sized})")