Wheest/save_tensors.py Secret

## save_tensors.py
#!/usr/bin/env python3

"""
Save TVM tensors as .dat files which we can read later.
This is useful for debugging and testing.
Ideally my forked version of TVM should genrate these files for us
"""

import re
import struct
import argparse
import pathlib

DATA_DIR = pathlib.Path("./testdata/")
DATA_DIR.mkdir(exist_ok=True)


# Function to convert hex floats to binary and write them to .dat files and save the CSV decimal version
def process_array(name, values):
    decoded_values = [float.fromhex(v) for v in values]
    binary_data = struct.pack(f"{len(decoded_values)}f", *decoded_values)

    # Save as .dat file
    dat_file_path = pathlib.Path(DATA_DIR) / f"{name}.dat"
    with open(dat_file_path, "wb") as f:
        f.write(binary_data)

    # Save as CSV file
    csv_file_path = pathlib.Path(DATA_DIR) / f"{name}.csv"
    with open(csv_file_path, "w") as f:
        f.writelines(",".join(map(str, decoded_values)))


# Function to read the array data from a text file
def read_arrays_from_file(file_path):
    array_pattern = re.compile(
        r"\s*float\s+(\w+)\s*\[\d+\]\s*__attribute__\s*\(\(.*?\)\);\s*//.*?,.*?offset:.*?(\d+)"
    )
    values_pattern = re.compile(
        r"\s*\.\s*(\w+)\s*=\s*\{\s*([-+0-9.xabcdefp\s]+(?:(?:,\s*)?[-+0-9.xabcdefp\s]+)*)"
    )
    array_data = {}  # Store array data with offsets

    # Read the file
    with open(file_path, "r") as file:
        content = file.read()

        # Find all array declarations along with their offsets
        for match in array_pattern.finditer(content):
            name, offset = match.groups()
            array_data[name] = {"offset": int(offset), "values": []}

        # Find all value lists and associate them with their arrays
        for match in values_pattern.finditer(content):
            name, values_section = match.groups()
            if name in array_data:
                # Remove any extra whitespaces before splitting the values
                stripped_values = values_section.strip()
                array_data[name]["values"] = re.findall(
                    r"[-+0-9.xabcdefp]+", stripped_values
                )

    # Process arrays
    for name, data in array_data.items():
        process_array(name, data["values"])


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Convert C-Style array init to separate .dat files"
    )
    parser.add_argument(
        "--input_file",
        type=str,
        default="./tvm_model/codegen/host/src/default_lib0.c",
        help="Path to the input file containing C-style array initializations",
    )
    args = parser.parse_args()

    read_arrays_from_file(args.input_file)
	#!/usr/bin/env python3

	"""
	Save TVM tensors as .dat files which we can read later.
	This is useful for debugging and testing.
	Ideally my forked version of TVM should genrate these files for us
	"""

	import re
	import struct
	import argparse
	import pathlib

	DATA_DIR = pathlib.Path("./testdata/")
	DATA_DIR.mkdir(exist_ok=True)


	# Function to convert hex floats to binary and write them to .dat files and save the CSV decimal version
	def process_array(name, values):
	decoded_values = [float.fromhex(v) for v in values]
	binary_data = struct.pack(f"{len(decoded_values)}f", *decoded_values)

	# Save as .dat file
	dat_file_path = pathlib.Path(DATA_DIR) / f"{name}.dat"
	with open(dat_file_path, "wb") as f:
	f.write(binary_data)

	# Save as CSV file
	csv_file_path = pathlib.Path(DATA_DIR) / f"{name}.csv"
	with open(csv_file_path, "w") as f:
	f.writelines(",".join(map(str, decoded_values)))


	# Function to read the array data from a text file
	def read_arrays_from_file(file_path):
	array_pattern = re.compile(
	r"\sfloat\s+(\w+)\s\[\d+\]\s__attribute__\s\(\(.?\)\);\s//.?,.?offset:.*?(\d+)"
	)
	values_pattern = re.compile(
	r"\s\.\s(\w+)\s=\s\{\s([-+0-9.xabcdefp\s]+(?:(?:,\s)?[-+0-9.xabcdefp\s]+)*)"
	)
	array_data = {} # Store array data with offsets

	# Read the file
	with open(file_path, "r") as file:
	content = file.read()

	# Find all array declarations along with their offsets
	for match in array_pattern.finditer(content):
	name, offset = match.groups()
	array_data[name] = {"offset": int(offset), "values": []}

	# Find all value lists and associate them with their arrays
	for match in values_pattern.finditer(content):
	name, values_section = match.groups()
	if name in array_data:
	# Remove any extra whitespaces before splitting the values
	stripped_values = values_section.strip()
	array_data[name]["values"] = re.findall(
	r"[-+0-9.xabcdefp]+", stripped_values
	)

	# Process arrays
	for name, data in array_data.items():
	process_array(name, data["values"])


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Convert C-Style array init to separate .dat files"
	)
	parser.add_argument(
	"--input_file",
	type=str,
	default="./tvm_model/codegen/host/src/default_lib0.c",
	help="Path to the input file containing C-style array initializations",
	)
	args = parser.parse_args()

	read_arrays_from_file(args.input_file)