-
-
Save jerinphilip/670495fca010b2cde4f34091fcb1f5d3 to your computer and use it in GitHub Desktop.
Inspect marian-file format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
from argparse import ArgumentParser | |
import difflib | |
class Bijective: | |
def __init__(self, forward): | |
self.forward = forward | |
self.backward = {value: key for key, value in self.forward.items()} | |
def get(self, key): | |
return self.forward.get(key, None) | |
def inverse(self, key): | |
return self.backward.get(key, None) | |
# fmt: off | |
TYPE_CLASS = Bijective({ | |
"signed_type" : int("0x0100", 16), | |
"unsigned_type" : int("0x0200", 16), | |
"float_type" : int("0x0400", 16), | |
"packed_type" : int("0x0800", 16), # special packed (CPU cache friendly) type class, used in FBGEMM. Annoyingly we need to keep 0x800 for back-compat, would be nicer to align with intgemm | |
"avx2_type" : int("0x1000", 16), # processor-specific layout for avx2, currently used for FBGEMM only (keep 0x1000 for back-compat) | |
"avx512_type" : int("0x2000", 16), # processor-specific layout for avx512, currently used for FBGEMM only (keep 0x2000 for back-compat) | |
"intgemm_type" : int("0x4000", 16), # intgemm quantized architecture agnostic models | |
"size_mask" : int("0x00FF", 16), # maximum allowed size is 256 bytes right now; if more are required, extend the size field | |
"class_mask" : int("0xFF00", 16), # three fields for different type classes, if more classes are added we need to increase the number of fields here | |
}) | |
TYPE = Bijective({ | |
"int8" : TYPE_CLASS.get("signed_type") + 1, # int8 type | |
"int16" : TYPE_CLASS.get("signed_type") + 2, # int16 type | |
"int32" : TYPE_CLASS.get("signed_type") + 4, # int32 type | |
"int64" : TYPE_CLASS.get("signed_type") + 8, # int64 type | |
"uint8" : TYPE_CLASS.get("unsigned_type") + 1, # uint8 type | |
"uint16" : TYPE_CLASS.get("unsigned_type") + 2, # uint16 type | |
"uint32" : TYPE_CLASS.get("unsigned_type") + 4, # uint33 type | |
"uint64" : TYPE_CLASS.get("unsigned_type") + 8, # uint64 type | |
"float16" : TYPE_CLASS.get("float_type") + 2, # float16 type | |
"float32" : TYPE_CLASS.get("float_type") + 4, # float32 type | |
"float64" : TYPE_CLASS.get("float_type") + 8, # float64 type | |
"packed16" : TYPE_CLASS.get("packed_type") + 2, # special type for FBGEMM, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint16) is meaningless. | |
"packed8avx2" : TYPE_CLASS.get("packed_type") + 2 + TYPE_CLASS.get("avx2_type"), # special type for FBGEMM with AVX2, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless. | |
"packed8avx512" : TYPE_CLASS.get("packed_type") + 1 + TYPE_CLASS.get("avx512_type"), # special type for FBGEMM with AVX512, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless. | |
"intgemm8" : TYPE_CLASS.get("signed_type") + 1 + TYPE_CLASS.get("intgemm_type"), # Int8 quantized (not packed) matrices for intgemm | |
"intgemm16" : TYPE_CLASS.get("signed_type") + 2 + TYPE_CLASS.get("intgemm_type"), # Int16 quantized (not packed) matrices for intgemm | |
}) | |
# fmt: on | |
class Header: | |
def __init__(self, name_len, type_id, shape_len, data_len): | |
self.name_len = name_len | |
self.shape_len = shape_len | |
self.type = type_id | |
self.data_len = data_len | |
def __repr__(self): | |
return f"Header(name={self.name_len}, type={self.type}, shape={self.shape_len}, data={self.data_len})" | |
class Reader: | |
def __init__(self, fp): | |
self.fp = fp | |
def uint64(self): | |
buffer = self.fp.read(8) | |
arg, *_ = struct.unpack("<Q", buffer) | |
return arg | |
def int32(self): | |
buffer = self.fp.read(4) | |
arg, *_ = struct.unpack("<i", buffer) | |
return arg | |
def char(self): | |
buffer = self.fp.read(1) | |
byte, *_ = struct.unpack("<c", buffer) | |
arg = byte.decode("utf-8") | |
return arg | |
def string(self, length): | |
binary = self.fp.read(length) | |
return binary.decode("utf-8")[:-1] | |
def bytes(self, length): | |
data = self.fp.read(length) | |
return data | |
def header(self): | |
num_fields = 4 | |
args = [self.uint64() for i in range(num_fields)] | |
return Header(*args) | |
def shape(self, length): | |
data = [] | |
for i in range(length): | |
datum = self.int32() | |
data.append(datum) | |
return data | |
class Item: | |
def __init__(self, type_id): | |
self.name = None | |
self.shape = None | |
self.data = None | |
self.type = type_id | |
def set_shape(self, shape): | |
self.shape = shape | |
def set_data(self, data): | |
self.data = data | |
def set_name(self, name): | |
self.name = name | |
def __repr__(self): | |
data_len = len(self.data) | |
typename = TYPE.inverse(self.type) or f"UnknownType[{self.type}]" | |
return f"Item({self.name}, {typename}, Shape({self.shape}), {data_len})" | |
def parse(model_path): | |
with open(model_path, "rb") as fp: | |
reader = Reader(fp) | |
version = reader.uint64() | |
num_headers = reader.uint64() | |
headers = [] | |
for i in range(num_headers): | |
header = reader.header() | |
headers.append(header) | |
items = [Item(header.type) for header in headers] | |
for header, item in zip(headers, items): | |
name = reader.string(header.name_len) | |
item.set_name(name) | |
for header, item in zip(headers, items): | |
shape = reader.shape(header.shape_len) | |
item.set_shape(shape) | |
# To align by 256, some slack is left. This distance is indicated in the | |
# next byte. Read, then consume distance bytes. | |
distance_to_256_aligned = reader.uint64() | |
garbage = reader.bytes(distance_to_256_aligned) | |
for header, item in zip(headers, items): | |
data = reader.bytes(header.data_len) | |
item.set_data(data) | |
for header, item in zip(headers, items): | |
config_name = "special:model.yml" | |
print(item) | |
if item.name == config_name: | |
config = item.data.decode("utf-8")[:-1] | |
print(config) | |
if __name__ == "__main__": | |
parser = ArgumentParser() | |
parser.add_argument("--model-path", type=str, required=True) | |
args = parser.parse_args() | |
parse(args.model_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[jerin@vty build]$ python3 ../inspect_marian_model.py --model-path $EN_DE_MODEL_PATH | |
{'avx2_type': 4096, | |
'avx512_type': 8192, | |
'class_mask': 65280, | |
'float_type': 1024, | |
'intgemm_type': 16384, | |
'packed_type': 2048, | |
'signed_type': 256, | |
'size_mask': 255, | |
'unsigned_type': 512} | |
{'float16': 1026, | |
'float32': 1028, | |
'float64': 1032, | |
'int16': 258, | |
'int32': 260, | |
'int64': 264, | |
'int8': 257, | |
'intgemm16': 16642, | |
'intgemm8': 16641, | |
'packed16': 2050, | |
'packed8avx2': 6145, | |
'packed8avx512': 10241, | |
'uint16': 514, | |
'uint32': 516, | |
'uint64': 520, | |
'uint8': 513} | |
Bit(i1=1, i2=2, i4=4, i8=8, i4096=4096) | |
Item(Wemb, intgemm8, Shape([32000, 256]), 8192256) | |
Item(Wemb_QuantMultA, intgemm8, Shape([1, 1]), 256) | |
Item(decoder_ff_logit_out_b, float32, Shape([1, 32000]), 128000) | |
Item(decoder_l1_context_Wk, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l1_context_Wk_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l1_context_Wo, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l1_context_Wo_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l1_context_Wo_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(decoder_l1_context_Wo_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(decoder_l1_context_Wq, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l1_context_Wq_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l1_context_Wv, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l1_context_Wv_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l1_context_bk, float32, Shape([1, 256]), 1024) | |
Item(decoder_l1_context_bo, float32, Shape([1, 256]), 1024) | |
Item(decoder_l1_context_bq, float32, Shape([1, 256]), 1024) | |
Item(decoder_l1_context_bv, float32, Shape([1, 256]), 1024) | |
Item(decoder_l1_ffn_W1, intgemm8, Shape([256, 1536]), 393472) | |
Item(decoder_l1_ffn_W1_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l1_ffn_W2, intgemm8, Shape([1536, 256]), 393472) | |
Item(decoder_l1_ffn_W2_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l1_ffn_b1, float32, Shape([1, 1536]), 6144) | |
Item(decoder_l1_ffn_b2, float32, Shape([1, 256]), 1024) | |
Item(decoder_l1_ffn_ffn_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(decoder_l1_ffn_ffn_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(decoder_l1_rnn_W, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l1_rnn_W_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l1_rnn_Wf, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l1_rnn_Wf_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l1_rnn_bf, float32, Shape([1, 256]), 1024) | |
Item(decoder_l1_rnn_ffn_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(decoder_l1_rnn_ffn_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_context_Wk, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l2_context_Wk_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l2_context_Wo, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l2_context_Wo_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l2_context_Wo_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_context_Wo_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_context_Wq, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l2_context_Wq_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l2_context_Wv, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l2_context_Wv_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l2_context_bk, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_context_bo, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_context_bq, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_context_bv, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_ffn_W1, intgemm8, Shape([256, 1536]), 393472) | |
Item(decoder_l2_ffn_W1_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l2_ffn_W2, intgemm8, Shape([1536, 256]), 393472) | |
Item(decoder_l2_ffn_W2_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l2_ffn_b1, float32, Shape([1, 1536]), 6144) | |
Item(decoder_l2_ffn_b2, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_ffn_ffn_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_ffn_ffn_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_rnn_W, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l2_rnn_W_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l2_rnn_Wf, intgemm8, Shape([256, 256]), 65792) | |
Item(decoder_l2_rnn_Wf_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(decoder_l2_rnn_bf, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_rnn_ffn_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(decoder_l2_rnn_ffn_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l1_ffn_W1, intgemm8, Shape([256, 1536]), 393472) | |
Item(encoder_l1_ffn_W1_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l1_ffn_W2, intgemm8, Shape([1536, 256]), 393472) | |
Item(encoder_l1_ffn_W2_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l1_ffn_b1, float32, Shape([1, 1536]), 6144) | |
Item(encoder_l1_ffn_b2, float32, Shape([1, 256]), 1024) | |
Item(encoder_l1_ffn_ffn_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l1_ffn_ffn_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l1_self_Wk, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l1_self_Wk_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l1_self_Wo, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l1_self_Wo_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l1_self_Wo_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l1_self_Wo_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l1_self_Wq, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l1_self_Wq_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l1_self_Wv, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l1_self_Wv_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l1_self_bk, float32, Shape([1, 256]), 1024) | |
Item(encoder_l1_self_bo, float32, Shape([1, 256]), 1024) | |
Item(encoder_l1_self_bq, float32, Shape([1, 256]), 1024) | |
Item(encoder_l1_self_bv, float32, Shape([1, 256]), 1024) | |
Item(encoder_l2_ffn_W1, intgemm8, Shape([256, 1536]), 393472) | |
Item(encoder_l2_ffn_W1_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l2_ffn_W2, intgemm8, Shape([1536, 256]), 393472) | |
Item(encoder_l2_ffn_W2_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l2_ffn_b1, float32, Shape([1, 1536]), 6144) | |
Item(encoder_l2_ffn_b2, float32, Shape([1, 256]), 1024) | |
Item(encoder_l2_ffn_ffn_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l2_ffn_ffn_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l2_self_Wk, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l2_self_Wk_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l2_self_Wo, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l2_self_Wo_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l2_self_Wo_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l2_self_Wo_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l2_self_Wq, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l2_self_Wq_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l2_self_Wv, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l2_self_Wv_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l2_self_bk, float32, Shape([1, 256]), 1024) | |
Item(encoder_l2_self_bo, float32, Shape([1, 256]), 1024) | |
Item(encoder_l2_self_bq, float32, Shape([1, 256]), 1024) | |
Item(encoder_l2_self_bv, float32, Shape([1, 256]), 1024) | |
Item(encoder_l3_ffn_W1, intgemm8, Shape([256, 1536]), 393472) | |
Item(encoder_l3_ffn_W1_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l3_ffn_W2, intgemm8, Shape([1536, 256]), 393472) | |
Item(encoder_l3_ffn_W2_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l3_ffn_b1, float32, Shape([1, 1536]), 6144) | |
Item(encoder_l3_ffn_b2, float32, Shape([1, 256]), 1024) | |
Item(encoder_l3_ffn_ffn_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l3_ffn_ffn_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l3_self_Wk, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l3_self_Wk_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l3_self_Wo, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l3_self_Wo_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l3_self_Wo_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l3_self_Wo_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l3_self_Wq, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l3_self_Wq_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l3_self_Wv, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l3_self_Wv_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l3_self_bk, float32, Shape([1, 256]), 1024) | |
Item(encoder_l3_self_bo, float32, Shape([1, 256]), 1024) | |
Item(encoder_l3_self_bq, float32, Shape([1, 256]), 1024) | |
Item(encoder_l3_self_bv, float32, Shape([1, 256]), 1024) | |
Item(encoder_l4_ffn_W1, intgemm8, Shape([256, 1536]), 393472) | |
Item(encoder_l4_ffn_W1_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l4_ffn_W2, intgemm8, Shape([1536, 256]), 393472) | |
Item(encoder_l4_ffn_W2_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l4_ffn_b1, float32, Shape([1, 1536]), 6144) | |
Item(encoder_l4_ffn_b2, float32, Shape([1, 256]), 1024) | |
Item(encoder_l4_ffn_ffn_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l4_ffn_ffn_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l4_self_Wk, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l4_self_Wk_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l4_self_Wo, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l4_self_Wo_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l4_self_Wo_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l4_self_Wo_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l4_self_Wq, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l4_self_Wq_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l4_self_Wv, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l4_self_Wv_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l4_self_bk, float32, Shape([1, 256]), 1024) | |
Item(encoder_l4_self_bo, float32, Shape([1, 256]), 1024) | |
Item(encoder_l4_self_bq, float32, Shape([1, 256]), 1024) | |
Item(encoder_l4_self_bv, float32, Shape([1, 256]), 1024) | |
Item(encoder_l5_ffn_W1, intgemm8, Shape([256, 1536]), 393472) | |
Item(encoder_l5_ffn_W1_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l5_ffn_W2, intgemm8, Shape([1536, 256]), 393472) | |
Item(encoder_l5_ffn_W2_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l5_ffn_b1, float32, Shape([1, 1536]), 6144) | |
Item(encoder_l5_ffn_b2, float32, Shape([1, 256]), 1024) | |
Item(encoder_l5_ffn_ffn_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l5_ffn_ffn_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l5_self_Wk, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l5_self_Wk_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l5_self_Wo, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l5_self_Wo_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l5_self_Wo_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l5_self_Wo_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l5_self_Wq, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l5_self_Wq_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l5_self_Wv, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l5_self_Wv_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l5_self_bk, float32, Shape([1, 256]), 1024) | |
Item(encoder_l5_self_bo, float32, Shape([1, 256]), 1024) | |
Item(encoder_l5_self_bq, float32, Shape([1, 256]), 1024) | |
Item(encoder_l5_self_bv, float32, Shape([1, 256]), 1024) | |
Item(encoder_l6_ffn_W1, intgemm8, Shape([256, 1536]), 393472) | |
Item(encoder_l6_ffn_W1_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l6_ffn_W2, intgemm8, Shape([1536, 256]), 393472) | |
Item(encoder_l6_ffn_W2_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l6_ffn_b1, float32, Shape([1, 1536]), 6144) | |
Item(encoder_l6_ffn_b2, float32, Shape([1, 256]), 1024) | |
Item(encoder_l6_ffn_ffn_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l6_ffn_ffn_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l6_self_Wk, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l6_self_Wk_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l6_self_Wo, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l6_self_Wo_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l6_self_Wo_ln_bias, float32, Shape([1, 256]), 1024) | |
Item(encoder_l6_self_Wo_ln_scale, float32, Shape([1, 256]), 1024) | |
Item(encoder_l6_self_Wq, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l6_self_Wq_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l6_self_Wv, intgemm8, Shape([256, 256]), 65792) | |
Item(encoder_l6_self_Wv_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(encoder_l6_self_bk, float32, Shape([1, 256]), 1024) | |
Item(encoder_l6_self_bo, float32, Shape([1, 256]), 1024) | |
Item(encoder_l6_self_bq, float32, Shape([1, 256]), 1024) | |
Item(encoder_l6_self_bv, float32, Shape([1, 256]), 1024) | |
Item(none_QuantMultA, float32, Shape([1, 1]), 256) | |
Item(special:model.yml, int8, Shape([1042]), 1042) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment